About Social Code
aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--meson.build4
-rw-r--r--meson.options2
-rw-r--r--src/kosmickrisp/.clang-format7
-rw-r--r--src/kosmickrisp/bridge/meson.build61
-rw-r--r--src/kosmickrisp/bridge/mtl_bridge.h43
-rw-r--r--src/kosmickrisp/bridge/mtl_bridge.m50
-rw-r--r--src/kosmickrisp/bridge/mtl_buffer.h26
-rw-r--r--src/kosmickrisp/bridge/mtl_buffer.m78
-rw-r--r--src/kosmickrisp/bridge/mtl_command_buffer.h27
-rw-r--r--src/kosmickrisp/bridge/mtl_command_buffer.m64
-rw-r--r--src/kosmickrisp/bridge/mtl_command_queue.h19
-rw-r--r--src/kosmickrisp/bridge/mtl_command_queue.m28
-rw-r--r--src/kosmickrisp/bridge/mtl_compute_state.h13
-rw-r--r--src/kosmickrisp/bridge/mtl_compute_state.m29
-rw-r--r--src/kosmickrisp/bridge/mtl_device.h40
-rw-r--r--src/kosmickrisp/bridge/mtl_device.m197
-rw-r--r--src/kosmickrisp/bridge/mtl_encoder.h152
-rw-r--r--src/kosmickrisp/bridge/mtl_encoder.m537
-rw-r--r--src/kosmickrisp/bridge/mtl_format.h205
-rw-r--r--src/kosmickrisp/bridge/mtl_heap.h30
-rw-r--r--src/kosmickrisp/bridge/mtl_heap.m83
-rw-r--r--src/kosmickrisp/bridge/mtl_library.h16
-rw-r--r--src/kosmickrisp/bridge/mtl_library.m43
-rw-r--r--src/kosmickrisp/bridge/mtl_render_state.h165
-rw-r--r--src/kosmickrisp/bridge/mtl_render_state.m475
-rw-r--r--src/kosmickrisp/bridge/mtl_sampler.h50
-rw-r--r--src/kosmickrisp/bridge/mtl_sampler.m118
-rw-r--r--src/kosmickrisp/bridge/mtl_sync.h29
-rw-r--r--src/kosmickrisp/bridge/mtl_sync.m66
-rw-r--r--src/kosmickrisp/bridge/mtl_texture.h27
-rw-r--r--src/kosmickrisp/bridge/mtl_texture.m94
-rw-r--r--src/kosmickrisp/bridge/mtl_types.h272
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_bridge.c24
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_buffer.c33
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_command_buffer.c35
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_command_queue.c19
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_compute_state.c14
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_device.c73
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_encoder.c273
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_heap.c37
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_library.c19
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_render_state.c288
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_sampler.c74
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_sync.c47
-rw-r--r--src/kosmickrisp/bridge/stubs/mtl_texture.c29
-rw-r--r--src/kosmickrisp/bridge/vk_to_mtl_map.c251
-rw-r--r--src/kosmickrisp/bridge/vk_to_mtl_map.h81
-rw-r--r--src/kosmickrisp/compiler/meson.build35
-rw-r--r--src/kosmickrisp/compiler/msl_iomap.c447
-rw-r--r--src/kosmickrisp/compiler/msl_nir_algebraic.py38
-rw-r--r--src/kosmickrisp/compiler/msl_nir_lower_common.c255
-rw-r--r--src/kosmickrisp/compiler/msl_nir_lower_subgroups.c98
-rw-r--r--src/kosmickrisp/compiler/msl_private.h77
-rw-r--r--src/kosmickrisp/compiler/msl_type_inference.c857
-rw-r--r--src/kosmickrisp/compiler/nir_to_msl.c2051
-rw-r--r--src/kosmickrisp/compiler/nir_to_msl.h56
-rw-r--r--src/kosmickrisp/kosmicomp.c187
-rw-r--r--src/kosmickrisp/meson.build16
-rw-r--r--src/kosmickrisp/util/kk_dispatch_trampolines_gen.py195
-rw-r--r--src/kosmickrisp/util/meson.build16
-rw-r--r--src/kosmickrisp/util/vk_entrypoints.py147
-rw-r--r--src/kosmickrisp/util/vk_extensions.py371
-rw-r--r--src/kosmickrisp/vulkan/cl/kk_query.cl50
-rw-r--r--src/kosmickrisp/vulkan/cl/kk_query.h21
-rw-r--r--src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl283
-rw-r--r--src/kosmickrisp/vulkan/kk_bo.c70
-rw-r--r--src/kosmickrisp/vulkan/kk_bo.h32
-rw-r--r--src/kosmickrisp/vulkan/kk_buffer.c209
-rw-r--r--src/kosmickrisp/vulkan/kk_buffer.h48
-rw-r--r--src/kosmickrisp/vulkan/kk_buffer_view.c124
-rw-r--r--src/kosmickrisp/vulkan/kk_buffer_view.h31
-rw-r--r--src/kosmickrisp/vulkan/kk_cmd_buffer.c533
-rw-r--r--src/kosmickrisp/vulkan/kk_cmd_buffer.h270
-rw-r--r--src/kosmickrisp/vulkan/kk_cmd_clear.c169
-rw-r--r--src/kosmickrisp/vulkan/kk_cmd_copy.c355
-rw-r--r--src/kosmickrisp/vulkan/kk_cmd_dispatch.c152
-rw-r--r--src/kosmickrisp/vulkan/kk_cmd_draw.c1010
-rw-r--r--src/kosmickrisp/vulkan/kk_cmd_meta.c318
-rw-r--r--src/kosmickrisp/vulkan/kk_cmd_pool.c64
-rw-r--r--src/kosmickrisp/vulkan/kk_cmd_pool.h32
-rw-r--r--src/kosmickrisp/vulkan/kk_debug.c22
-rw-r--r--src/kosmickrisp/vulkan/kk_debug.h21
-rw-r--r--src/kosmickrisp/vulkan/kk_descriptor_set.c806
-rw-r--r--src/kosmickrisp/vulkan/kk_descriptor_set.h81
-rw-r--r--src/kosmickrisp/vulkan/kk_descriptor_set_layout.c496
-rw-r--r--src/kosmickrisp/vulkan/kk_descriptor_set_layout.h103
-rw-r--r--src/kosmickrisp/vulkan/kk_descriptor_types.h45
-rw-r--r--src/kosmickrisp/vulkan/kk_device.c348
-rw-r--r--src/kosmickrisp/vulkan/kk_device.h137
-rw-r--r--src/kosmickrisp/vulkan/kk_device_lib.c191
-rw-r--r--src/kosmickrisp/vulkan/kk_device_memory.c258
-rw-r--r--src/kosmickrisp/vulkan/kk_device_memory.h30
-rw-r--r--src/kosmickrisp/vulkan/kk_encoder.c480
-rw-r--r--src/kosmickrisp/vulkan/kk_encoder.h125
-rw-r--r--src/kosmickrisp/vulkan/kk_event.c143
-rw-r--r--src/kosmickrisp/vulkan/kk_event.h27
-rw-r--r--src/kosmickrisp/vulkan/kk_format.c359
-rw-r--r--src/kosmickrisp/vulkan/kk_format.h55
-rw-r--r--src/kosmickrisp/vulkan/kk_image.c967
-rw-r--r--src/kosmickrisp/vulkan/kk_image.h155
-rw-r--r--src/kosmickrisp/vulkan/kk_image_layout.c124
-rw-r--r--src/kosmickrisp/vulkan/kk_image_layout.h140
-rw-r--r--src/kosmickrisp/vulkan/kk_image_view.c267
-rw-r--r--src/kosmickrisp/vulkan/kk_image_view.h57
-rw-r--r--src/kosmickrisp/vulkan/kk_instance.c225
-rw-r--r--src/kosmickrisp/vulkan/kk_instance.h26
-rw-r--r--src/kosmickrisp/vulkan/kk_nir_lower_descriptors.c765
-rw-r--r--src/kosmickrisp/vulkan/kk_nir_lower_multiview.c113
-rw-r--r--src/kosmickrisp/vulkan/kk_nir_lower_textures.c193
-rw-r--r--src/kosmickrisp/vulkan/kk_nir_lower_vbo.c279
-rw-r--r--src/kosmickrisp/vulkan/kk_nir_lower_vbo.h44
-rw-r--r--src/kosmickrisp/vulkan/kk_physical_device.c1032
-rw-r--r--src/kosmickrisp/vulkan/kk_physical_device.h91
-rw-r--r--src/kosmickrisp/vulkan/kk_private.h95
-rw-r--r--src/kosmickrisp/vulkan/kk_query_pool.c431
-rw-r--r--src/kosmickrisp/vulkan/kk_query_pool.h31
-rw-r--r--src/kosmickrisp/vulkan/kk_query_table.c241
-rw-r--r--src/kosmickrisp/vulkan/kk_query_table.h57
-rw-r--r--src/kosmickrisp/vulkan/kk_queue.c114
-rw-r--r--src/kosmickrisp/vulkan/kk_queue.h42
-rw-r--r--src/kosmickrisp/vulkan/kk_sampler.c217
-rw-r--r--src/kosmickrisp/vulkan/kk_sampler.h40
-rw-r--r--src/kosmickrisp/vulkan/kk_shader.c1278
-rw-r--r--src/kosmickrisp/vulkan/kk_shader.h85
-rw-r--r--src/kosmickrisp/vulkan/kk_sync.c106
-rw-r--r--src/kosmickrisp/vulkan/kk_sync.h23
-rw-r--r--src/kosmickrisp/vulkan/kk_wsi.c114
-rw-r--r--src/kosmickrisp/vulkan/kk_wsi.h16
-rw-r--r--src/kosmickrisp/vulkan/meson.build211
-rw-r--r--src/meson.build3
130 files changed, 24271 insertions, 2 deletions
diff --git a/meson.build b/meson.build
index ec978d02cdd..80917def523 100644
--- a/meson.build
+++ b/meson.build
@@ -271,7 +271,7 @@ elif _vulkan_drivers.contains('all')
_vulkan_drivers = ['amd', 'intel', 'intel_hasvk', 'swrast',
'freedreno', 'panfrost', 'virtio', 'broadcom',
'imagination', 'microsoft-experimental',
- 'nouveau', 'asahi', 'gfxstream']
+ 'nouveau', 'asahi', 'gfxstream', 'kosmickrisp']
endif
with_intel_vk = _vulkan_drivers.contains('intel')
@@ -288,6 +288,7 @@ with_microsoft_vk = _vulkan_drivers.contains('microsoft-experimental')
with_nouveau_vk = _vulkan_drivers.contains('nouveau')
with_asahi_vk = _vulkan_drivers.contains('asahi')
with_gfxstream_vk = _vulkan_drivers.contains('gfxstream')
+with_kosmickrisp_vk = _vulkan_drivers.contains('kosmickrisp')
with_any_vk = _vulkan_drivers.length() != 0
with_llvm = with_llvm \
@@ -829,6 +830,7 @@ with_driver_using_cl = [
with_gallium_asahi, with_asahi_vk, with_tools.contains('asahi'),
with_gallium_panfrost, with_panfrost_vk,
with_nouveau_vk, with_imagination_vk,
+ with_kosmickrisp_vk,
].contains(true)
if get_option('mesa-clc') == 'system'
diff --git a/meson.options b/meson.options
index b1f98d7452a..75731475c12 100644
--- a/meson.options
+++ b/meson.options
@@ -209,7 +209,7 @@ option(
choices : ['auto', 'amd', 'broadcom', 'freedreno', 'intel', 'intel_hasvk',
'panfrost', 'swrast', 'virtio', 'imagination',
'microsoft-experimental', 'nouveau', 'asahi', 'gfxstream',
- 'all'],
+ 'kosmickrisp', 'all'],
description : 'List of vulkan drivers to build. If this is set to auto ' +
'all drivers applicable to the target OS/architecture ' +
'will be built'
diff --git a/src/kosmickrisp/.clang-format b/src/kosmickrisp/.clang-format
new file mode 100644
index 00000000000..91bc46f7dbf
--- /dev/null
+++ b/src/kosmickrisp/.clang-format
@@ -0,0 +1,7 @@
+BasedOnStyle: InheritParentConfig
+DisableFormat: false
+
+AlignConsecutiveBitFields: true
+ColumnLimit: 80
+BreakStringLiterals: false
+SpaceBeforeParens: ControlStatementsExceptControlMacros
diff --git a/src/kosmickrisp/bridge/meson.build b/src/kosmickrisp/bridge/meson.build
new file mode 100644
index 00000000000..b2f5306bd9b
--- /dev/null
+++ b/src/kosmickrisp/bridge/meson.build
@@ -0,0 +1,61 @@
+# Copyright 2025 LunarG, Inc.
+# Copyright 2025 Google LLC
+# SPDX-License-Identifier: MIT
+
+mtl_bridge_files = files(
+ 'vk_to_mtl_map.h',
+ 'vk_to_mtl_map.c',
+ 'mtl_format.h',
+)
+
+if host_machine.system() == 'darwin'
+ mtl_bridge_files += files(
+ 'mtl_bridge.m',
+ 'mtl_buffer.m',
+ 'mtl_command_buffer.m',
+ 'mtl_command_queue.m',
+ 'mtl_compute_state.m',
+ 'mtl_device.m',
+ 'mtl_encoder.m',
+ 'mtl_heap.m',
+ 'mtl_library.m',
+ 'mtl_render_state.m',
+ 'mtl_sampler.m',
+ 'mtl_sync.m',
+ 'mtl_texture.m',
+ )
+else
+ mtl_bridge_files += files(
+ 'stubs/mtl_bridge.c',
+ 'stubs/mtl_buffer.c',
+ 'stubs/mtl_command_buffer.c',
+ 'stubs/mtl_command_queue.c',
+ 'stubs/mtl_compute_state.c',
+ 'stubs/mtl_device.c',
+ 'stubs/mtl_encoder.c',
+ 'stubs/mtl_heap.c',
+ 'stubs/mtl_library.c',
+ 'stubs/mtl_render_state.c',
+ 'stubs/mtl_sampler.c',
+ 'stubs/mtl_sync.c',
+ 'stubs/mtl_texture.c',
+ )
+endif
+
+mtl_bridge_dependencies = [
+ idep_vulkan_lite_runtime_headers,
+ idep_vulkan_util_headers
+]
+
+libmtl_bridge = static_library(
+ 'mtl_bridge',
+ [mtl_bridge_files],
+ include_directories : [include_directories('../vulkan/'), inc_include, inc_src],
+ dependencies : mtl_bridge_dependencies,
+ gnu_symbol_visibility: 'hidden',
+ build_by_default: false,
+)
+
+idep_mtl_bridge = declare_dependency(
+ link_with : libmtl_bridge,
+)
diff --git a/src/kosmickrisp/bridge/mtl_bridge.h b/src/kosmickrisp/bridge/mtl_bridge.h
new file mode 100644
index 00000000000..bcb8245d42c
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_bridge.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_BRIDGE_H
+#define KK_BRIDGE_H 1
+
+/* C wrappers for Metal. May not be complete. If you find something you need
+ * feel free to add them where they belong. As a rule of thumb, member functions
+ * go in the objects' .h/.m/.c Naming convention for wrappers is:
+ * object_type* mtl_new_object_type(params...);
+ * void mtl_member_function(object_type* ptr, params...);
+ * void mtl_object_set_member(object_type* ptr, member_type value);
+ * member_type mtl_object_get_member(object_type* ptr);
+ *
+ * Functions that have new in the name require to release the returned object
+ * via mtl_release(object);
+ * */
+
+#include "mtl_types.h"
+
+#include "mtl_buffer.h"
+#include "mtl_command_buffer.h"
+#include "mtl_command_queue.h"
+#include "mtl_compute_state.h"
+#include "mtl_device.h"
+#include "mtl_encoder.h"
+#include "mtl_format.h"
+#include "mtl_heap.h"
+#include "mtl_library.h"
+#include "mtl_render_state.h"
+#include "mtl_sampler.h"
+#include "mtl_sync.h"
+#include "mtl_texture.h"
+
+mtl_texture *mtl_drawable_get_texture(void *drawable_ptr);
+
+void *mtl_retain(void *handle);
+void mtl_release(void *handle);
+
+#endif /* KK_BRIDGE_H */
diff --git a/src/kosmickrisp/bridge/mtl_bridge.m b/src/kosmickrisp/bridge/mtl_bridge.m
new file mode 100644
index 00000000000..a5ef514f630
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_bridge.m
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_bridge.h"
+
+// kk_image_layout.h should also includes "vulkan/vulkan.h", but just to be safe
+#include "vulkan/vulkan.h"
+#include "kk_image_layout.h"
+
+#include "util/macros.h"
+
+#include <Metal/MTLCommandBuffer.h>
+#include <Metal/MTLCommandQueue.h>
+#include <Metal/MTLDevice.h>
+#include <Metal/MTLHeap.h>
+#include <Metal/MTLEvent.h>
+
+#include <QuartzCore/CAMetalLayer.h>
+
+static_assert(sizeof(MTLResourceID) == sizeof(uint64_t), "Must match, otherwise descriptors are broken");
+
+mtl_texture *
+mtl_drawable_get_texture(void *drawable_ptr)
+{
+ @autoreleasepool {
+ id<CAMetalDrawable> drawable = (id<CAMetalDrawable>)drawable_ptr;
+ return drawable.texture;
+ }
+}
+
+void *
+mtl_retain(void *handle)
+{
+ @autoreleasepool {
+ NSObject *obj = (NSObject *)handle;
+ return [obj retain];
+ }
+}
+
+void
+mtl_release(void *handle)
+{
+ @autoreleasepool {
+ NSObject *obj = (NSObject *)handle;
+ [obj release];
+ }
+}
diff --git a/src/kosmickrisp/bridge/mtl_buffer.h b/src/kosmickrisp/bridge/mtl_buffer.h
new file mode 100644
index 00000000000..fc8cd21e5fa
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_buffer.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MTL_BUFFER_H
+#define MTL_BUFFER_H 1
+
+#include "mtl_types.h"
+
+#include <inttypes.h>
+
+struct kk_image_layout;
+
+/* Utils */
+uint64_t mtl_buffer_get_length(mtl_buffer *buffer);
+uint64_t mtl_buffer_get_gpu_address(mtl_buffer *buffer);
+/* Gets CPU address */
+void *mtl_get_contents(mtl_buffer *buffer);
+
+/* Allocation from buffer */
+mtl_texture *mtl_new_texture_with_descriptor_linear(
+ mtl_buffer *buffer, const struct kk_image_layout *layout, uint64_t offset);
+
+#endif /* MTL_BUFFER_H */ \ No newline at end of file
diff --git a/src/kosmickrisp/bridge/mtl_buffer.m b/src/kosmickrisp/bridge/mtl_buffer.m
new file mode 100644
index 00000000000..dedb309e7fd
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_buffer.m
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_buffer.h"
+
+/* TODO_KOSMICKRISP Remove */
+#include "kk_image_layout.h"
+
+#include <Metal/MTLBuffer.h>
+#include <Metal/MTLTexture.h>
+
+uint64_t
+mtl_buffer_get_length(mtl_buffer *buffer)
+{
+ @autoreleasepool {
+ id<MTLBuffer> buf = (id<MTLBuffer>)buffer;
+ return buf.length;
+ }
+}
+
+uint64_t
+mtl_buffer_get_gpu_address(mtl_buffer *buffer)
+{
+ @autoreleasepool {
+ id<MTLBuffer> buf = (id<MTLBuffer>)buffer;
+ return [buf gpuAddress];
+ }
+}
+
+void *
+mtl_get_contents(mtl_buffer *buffer)
+{
+ @autoreleasepool {
+ id<MTLBuffer> buf = (id<MTLBuffer>)buffer;
+ return [buf contents];
+ }
+}
+
+/* TODO_KOSMICKRISP This is a duplicate, but both should be removed once we move kk_image_layout to the bridge. */
+static MTLTextureDescriptor *
+mtl_new_texture_descriptor(const struct kk_image_layout *layout)
+{
+ @autoreleasepool {
+ MTLTextureDescriptor *descriptor = [MTLTextureDescriptor new];
+ descriptor.textureType = (MTLTextureType)layout->type;
+ descriptor.pixelFormat = layout->format.mtl;
+ descriptor.width = layout->width_px;
+ descriptor.height = layout->height_px;
+ descriptor.depth = layout->depth_px;
+ descriptor.mipmapLevelCount = layout->levels;
+ descriptor.sampleCount = layout->sample_count_sa;
+ descriptor.arrayLength = layout->layers;
+ descriptor.allowGPUOptimizedContents = layout->optimized_layout;
+ descriptor.usage = (MTLTextureUsage)layout->usage;
+ /* We don't set the swizzle because Metal complains when the usage has store or render target with swizzle... */
+
+ return descriptor;
+ }
+}
+
+mtl_texture *
+mtl_new_texture_with_descriptor_linear(mtl_buffer *buffer,
+ const struct kk_image_layout *layout,
+ uint64_t offset)
+{
+ @autoreleasepool {
+ id<MTLBuffer> buf = (id<MTLBuffer>)buffer;
+ MTLTextureDescriptor *descriptor = [mtl_new_texture_descriptor(layout) autorelease];
+ descriptor.resourceOptions = buf.resourceOptions;
+ id<MTLTexture> texture = [buf newTextureWithDescriptor:descriptor offset:offset bytesPerRow:layout->linear_stride_B];
+
+ return texture;
+ }
+}
+
diff --git a/src/kosmickrisp/bridge/mtl_command_buffer.h b/src/kosmickrisp/bridge/mtl_command_buffer.h
new file mode 100644
index 00000000000..6567c0a278e
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_command_buffer.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MTL_COMMAND_BUFFER_H
+#define MTL_COMMAND_BUFFER_H 1
+
+#include "mtl_types.h"
+
+#include <stdint.h>
+
+void mtl_encode_signal_event(mtl_command_buffer *cmd_buf_handle,
+ mtl_event *event_handle, uint64_t value);
+
+void mtl_encode_wait_for_event(mtl_command_buffer *cmd_buf_handle,
+ mtl_event *event_handle, uint64_t value);
+
+void mtl_add_completed_handler(mtl_command_buffer *cmd,
+ void (*callback)(void *data), void *data);
+
+void mtl_command_buffer_commit(mtl_command_buffer *cmd_buf);
+
+void mtl_present_drawable(mtl_command_buffer *cmd_buf, void *drawable);
+
+#endif /* MTL_COMMAND_BUFFER_H */
diff --git a/src/kosmickrisp/bridge/mtl_command_buffer.m b/src/kosmickrisp/bridge/mtl_command_buffer.m
new file mode 100644
index 00000000000..3086bd293c2
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_command_buffer.m
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_command_buffer.h"
+
+#include <Metal/MTLCommandBuffer.h>
+#include <QuartzCore/CAMetalLayer.h>
+
+void
+mtl_encode_signal_event(mtl_command_buffer *cmd_buf_handle,
+ mtl_event *event_handle, uint64_t value)
+{
+ @autoreleasepool {
+ id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>)cmd_buf_handle;
+ id<MTLEvent> event = (id<MTLEvent>)event_handle;
+ [cmd_buf encodeSignalEvent:event value:value];
+ }
+}
+
+void
+mtl_encode_wait_for_event(mtl_command_buffer *cmd_buf_handle,
+ mtl_event *event_handle, uint64_t value)
+{
+ @autoreleasepool {
+ id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>)cmd_buf_handle;
+ id<MTLEvent> event = (id<MTLEvent>)event_handle;
+ [cmd_buf encodeWaitForEvent:event value:value];
+ }
+}
+
+void
+mtl_add_completed_handler(mtl_command_buffer *cmd, void (*callback)(void *data),
+ void *data)
+{
+ @autoreleasepool {
+ id<MTLCommandBuffer> mtl_cmd = (id<MTLCommandBuffer>)cmd;
+ [mtl_cmd addCompletedHandler:^(id<MTLCommandBuffer> _Nonnull cmd_buf) {
+ if (callback)
+ callback(data);
+ }];
+ }
+}
+
+void
+mtl_command_buffer_commit(mtl_command_buffer *cmd_buffer)
+{
+ @autoreleasepool {
+ id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>)cmd_buffer;
+ [cmd_buf commit];
+ }
+}
+
+void
+mtl_present_drawable(mtl_command_buffer *cmd_buf, void *drawable_ptr)
+{
+ @autoreleasepool {
+ id<MTLCommandBuffer> cmd = (id<MTLCommandBuffer>)cmd_buf;
+ id<CAMetalDrawable> drawable = [(id<CAMetalDrawable>)drawable_ptr autorelease];
+ [cmd presentDrawable:drawable];
+ }
+}
diff --git a/src/kosmickrisp/bridge/mtl_command_queue.h b/src/kosmickrisp/bridge/mtl_command_queue.h
new file mode 100644
index 00000000000..8cc0149d3a6
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_command_queue.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MTL_COMMAND_QUEUE_H
+#define MTL_COMMAND_QUEUE_H 1
+
+#include "mtl_types.h"
+
+#include <stdint.h>
+
+mtl_command_queue *mtl_new_command_queue(mtl_device *device,
+ uint32_t cmd_buffer_count);
+
+mtl_command_buffer *mtl_new_command_buffer(mtl_command_queue *cmd_queue);
+
+#endif /* MTL_COMMAND_QUEUE_H */ \ No newline at end of file
diff --git a/src/kosmickrisp/bridge/mtl_command_queue.m b/src/kosmickrisp/bridge/mtl_command_queue.m
new file mode 100644
index 00000000000..7f36cc69d9c
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_command_queue.m
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_command_queue.h"
+
+#include <Metal/MTLDevice.h>
+#include <Metal/MTLCommandQueue.h>
+
+mtl_command_queue *
+mtl_new_command_queue(mtl_device *device, uint32_t cmd_buffer_count)
+{
+ @autoreleasepool {
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ return [dev newCommandQueueWithMaxCommandBufferCount:cmd_buffer_count];
+ }
+}
+
+mtl_command_buffer *
+mtl_new_command_buffer(mtl_command_queue *cmd_queue)
+{
+ @autoreleasepool {
+ id<MTLCommandQueue> queue = (id<MTLCommandQueue>)cmd_queue;
+ return [[queue commandBuffer] retain];
+ }
+}
diff --git a/src/kosmickrisp/bridge/mtl_compute_state.h b/src/kosmickrisp/bridge/mtl_compute_state.h
new file mode 100644
index 00000000000..f47a9e7e9c5
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_compute_state.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_types.h"
+
+#include <stdint.h>
+
+mtl_compute_pipeline_state *
+mtl_new_compute_pipeline_state(mtl_device *device, mtl_function *function,
+ uint64_t max_total_threads_per_threadgroup); \ No newline at end of file
diff --git a/src/kosmickrisp/bridge/mtl_compute_state.m b/src/kosmickrisp/bridge/mtl_compute_state.m
new file mode 100644
index 00000000000..f6ebce751b5
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_compute_state.m
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_compute_state.h"
+
+#include <Metal/MTLComputePipeline.h>
+
+mtl_compute_pipeline_state *
+mtl_new_compute_pipeline_state(mtl_device *device, mtl_function *function,
+ uint64_t max_total_threads_per_threadgroup)
+{
+ @autoreleasepool {
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ id<MTLComputePipelineState> pipeline = NULL;
+
+ MTLComputePipelineDescriptor *comp_desc = [[[MTLComputePipelineDescriptor alloc] init] autorelease];
+ NSError *error;
+ comp_desc.computeFunction = (id<MTLFunction>)function;
+ comp_desc.maxTotalThreadsPerThreadgroup = max_total_threads_per_threadgroup;
+ pipeline = [dev newComputePipelineStateWithDescriptor:comp_desc options:0 reflection:nil error:&error];
+
+ /* TODO_KOSMICKRISP Error checking */
+
+ return pipeline;
+ }
+}
diff --git a/src/kosmickrisp/bridge/mtl_device.h b/src/kosmickrisp/bridge/mtl_device.h
new file mode 100644
index 00000000000..30ee1b0967e
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_device.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MTL_DEVICE_H
+#define MTL_DEVICE_H 1
+
+#include "mtl_types.h"
+
+#include <stdint.h>
+
+/* TODO_KOSMICKRISP Remove */
+struct kk_image_layout;
+
+/* Device creation */
+mtl_device *mtl_device_create(void);
+
+/* Device operations */
+void mtl_start_gpu_capture(mtl_device *mtl_dev_handle);
+void mtl_stop_gpu_capture(void);
+
+/* Device feature query */
+void mtl_device_get_name(mtl_device *dev, char buffer[256]);
+void mtl_device_get_architecture_name(mtl_device *dev, char buffer[256]);
+uint64_t mtl_device_get_peer_group_id(mtl_device *dev);
+uint32_t mtl_device_get_peer_index(mtl_device *dev);
+uint64_t mtl_device_get_registry_id(mtl_device *dev);
+struct mtl_size mtl_device_max_threads_per_threadgroup(mtl_device *dev);
+
+/* Resource queries */
+void mtl_heap_buffer_size_and_align_with_length(mtl_device *device,
+ uint64_t *size_B,
+ uint64_t *align_B);
+void
+mtl_heap_texture_size_and_align_with_descriptor(mtl_device *device,
+ struct kk_image_layout *layout);
+
+#endif /* MTL_DEVICE_H */ \ No newline at end of file
diff --git a/src/kosmickrisp/bridge/mtl_device.m b/src/kosmickrisp/bridge/mtl_device.m
new file mode 100644
index 00000000000..aab0e735ff6
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_device.m
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_device.h"
+
+/* TODO_KOSMICKRISP Remove */
+#include "kk_image_layout.h"
+#include "kk_private.h"
+
+#include <Metal/MTLDevice.h>
+#include <Metal/MTLCaptureManager.h>
+
+/* Device creation */
+mtl_device *
+mtl_device_create()
+{
+ mtl_device *device = 0u;
+
+ @autoreleasepool {
+ NSArray<id<MTLDevice>> *devs = MTLCopyAllDevices();
+ uint32_t device_count = [devs count];
+
+ for (uint32_t i = 0u; i < device_count; ++i) {
+ if (@available(macOS 10.15, *)) {
+ if (!device && [devs[i] supportsFamily:MTLGPUFamilyMetal3]) {
+ device = (mtl_device *)[devs[i] retain];
+ }
+ [devs[i] autorelease];
+ }
+ }
+
+ return device;
+ }
+}
+
+/* Device operations */
+void
+mtl_start_gpu_capture(mtl_device *mtl_dev_handle)
+{
+ @autoreleasepool {
+ id<MTLDevice> mtl_dev = (id<MTLDevice>)mtl_dev_handle;
+ MTLCaptureManager *captureMgr = [MTLCaptureManager sharedCaptureManager];
+
+ // Before macOS 10.15 and iOS 13.0, captureDesc will just be nil
+ MTLCaptureDescriptor *captureDesc = [[MTLCaptureDescriptor new] autorelease];
+ captureDesc.captureObject = mtl_dev;
+ captureDesc.destination = MTLCaptureDestinationDeveloperTools;
+
+ // TODO_KOSMICKRISP Support dumping a trace to a file?
+ // NSString *tmp_dir = NSTemporaryDirectory();
+ // NSString *pname = [[NSProcessInfo processInfo] processName];
+ // NSString *capture_path = [NSString stringWithFormat:@"%@/%@.gputrace", tmp_dir, pname];
+ // if ([captureMgr supportsDestination: MTLCaptureDestinationGPUTraceDocument] ) {
+ // captureDesc.destination = MTLCaptureDestinationGPUTraceDocument;
+ // captureDesc.outputURL = [NSURL fileURLWithPath: capture_path];
+ //}
+
+ NSError *err = nil;
+ if (![captureMgr startCaptureWithDescriptor:captureDesc error:&err]) {
+ // fprintf(stderr, "Failed to automatically start GPU capture session (Error code %li) using startCaptureWithDescriptor: %s\n",
+ // (long)err.code, err.localizedDescription.UTF8String);
+ // fprintf(stderr, "Using startCaptureWithDevice\n");
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+ [captureMgr startCaptureWithDevice:mtl_dev];
+#pragma clang diagnostic pop
+ }
+
+ //[tmp_dir release];
+ //[pname release];
+ //[capture_path release];
+ }
+}
+
+void
+mtl_stop_gpu_capture()
+{
+ @autoreleasepool {
+ [[MTLCaptureManager sharedCaptureManager] stopCapture];
+ }
+}
+
+/* Device feature query */
+void
+mtl_device_get_name(mtl_device *dev, char buffer[256])
+{
+ @autoreleasepool {
+ id<MTLDevice> device = (id<MTLDevice>)dev;
+ [device.name getCString:buffer maxLength:(sizeof(char) * 256) encoding:NSUTF8StringEncoding];
+ }
+}
+
+void
+mtl_device_get_architecture_name(mtl_device *dev, char buffer[256])
+{
+ @autoreleasepool {
+ id<MTLDevice> device = (id<MTLDevice>)dev;
+ [device.architecture.name getCString:buffer maxLength:(sizeof(char) * 256) encoding:NSUTF8StringEncoding];
+ }
+}
+
+uint64_t
+mtl_device_get_peer_group_id(mtl_device *dev)
+{
+ @autoreleasepool {
+ id<MTLDevice> device = (id<MTLDevice>)dev;
+ return device.peerGroupID;
+ }
+}
+
+uint32_t
+mtl_device_get_peer_index(mtl_device *dev)
+{
+ @autoreleasepool {
+ id<MTLDevice> device = (id<MTLDevice>)dev;
+ return device.peerIndex;
+ }
+}
+
+uint64_t
+mtl_device_get_registry_id(mtl_device *dev)
+{
+ @autoreleasepool {
+ id<MTLDevice> device = (id<MTLDevice>)dev;
+ return device.registryID;
+ }
+}
+
+struct mtl_size
+mtl_device_max_threads_per_threadgroup(mtl_device *dev)
+{
+ @autoreleasepool {
+ id<MTLDevice> device = (id<MTLDevice>)dev;
+ return (struct mtl_size){.x = device.maxThreadsPerThreadgroup.width,
+ .y = device.maxThreadsPerThreadgroup.height,
+ .z = device.maxThreadsPerThreadgroup.depth};
+ }
+}
+
+/* Resource queries */
+/* TODO_KOSMICKRISP Return a struct */
+void
+mtl_heap_buffer_size_and_align_with_length(mtl_device *device, uint64_t *size_B,
+ uint64_t *align_B)
+{
+ @autoreleasepool {
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ MTLSizeAndAlign size_align = [dev heapBufferSizeAndAlignWithLength:*size_B options:KK_MTL_RESOURCE_OPTIONS];
+ *size_B = size_align.size;
+ *align_B = size_align.align;
+ }
+}
+
+/* TODO_KOSMICKRISP Remove */
+static MTLTextureDescriptor *
+mtl_new_texture_descriptor(const struct kk_image_layout *layout)
+{
+ @autoreleasepool {
+ MTLTextureDescriptor *descriptor = [MTLTextureDescriptor new];
+ descriptor.textureType = (MTLTextureType)layout->type;
+ descriptor.pixelFormat = layout->format.mtl;
+ descriptor.width = layout->width_px;
+ descriptor.height = layout->height_px;
+ descriptor.depth = layout->depth_px;
+ descriptor.mipmapLevelCount = layout->levels;
+ descriptor.sampleCount = layout->sample_count_sa;
+ descriptor.arrayLength = layout->layers;
+ descriptor.allowGPUOptimizedContents = layout->optimized_layout;
+ descriptor.usage = (MTLTextureUsage)layout->usage;
+ /* We don't set the swizzle because Metal complains when the usage has store or render target with swizzle... */
+
+ return descriptor;
+ }
+}
+
+void
+mtl_heap_texture_size_and_align_with_descriptor(mtl_device *device,
+ struct kk_image_layout *layout)
+{
+ @autoreleasepool {
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ if (layout->optimized_layout) {
+ MTLTextureDescriptor *descriptor = [mtl_new_texture_descriptor(layout) autorelease];
+ descriptor.resourceOptions = KK_MTL_RESOURCE_OPTIONS;
+ MTLSizeAndAlign size_align = [dev heapTextureSizeAndAlignWithDescriptor:descriptor];
+ layout->size_B = size_align.size;
+ layout->align_B = size_align.align;
+ } else {
+ /* Linear textures have different alignment since they are allocated on top of MTLBuffers */
+ layout->align_B = [dev minimumLinearTextureAlignmentForPixelFormat:layout->format.mtl];
+ }
+ }
+}
diff --git a/src/kosmickrisp/bridge/mtl_encoder.h b/src/kosmickrisp/bridge/mtl_encoder.h
new file mode 100644
index 00000000000..bfd582d571e
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_encoder.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MTL_ENCODER_H
+#define MTL_ENCODER_H 1
+
+#include "mtl_types.h"
+
+#include <stdint.h>
+
+/* Common encoder utils */
+void mtl_end_encoding(void *encoder);
+
+/* MTLBlitEncoder */
+mtl_blit_encoder *mtl_new_blit_command_encoder(mtl_command_buffer *cmd_buffer);
+
+void mtl_blit_update_fence(mtl_blit_encoder *encoder, mtl_fence *fence);
+void mtl_blit_wait_for_fence(mtl_blit_encoder *encoder, mtl_fence *fence);
+
+void mtl_copy_from_buffer_to_buffer(mtl_blit_encoder *blit_enc_handle,
+ mtl_buffer *src_buf, size_t src_offset,
+ mtl_buffer *dst_buf, size_t dst_offset,
+ size_t size);
+
+void mtl_copy_from_buffer_to_texture(mtl_blit_encoder *blit_enc_handle,
+ struct mtl_buffer_image_copy *data);
+
+void mtl_copy_from_texture_to_buffer(mtl_blit_encoder *blit_enc_handle,
+ struct mtl_buffer_image_copy *data);
+
+void mtl_copy_from_texture_to_texture(
+ mtl_blit_encoder *blit_enc_handle, mtl_texture *src_tex_handle,
+ size_t src_slice, size_t src_level, struct mtl_origin src_origin,
+ struct mtl_size src_size, mtl_texture *dst_tex_handle, size_t dst_slice,
+ size_t dst_level, struct mtl_origin dst_origin);
+
+/* MTLComputeEncoder */
+mtl_compute_encoder *
+mtl_new_compute_command_encoder(mtl_command_buffer *cmd_buffer);
+
+void mtl_compute_update_fence(mtl_compute_encoder *encoder, mtl_fence *fence);
+void mtl_compute_wait_for_fence(mtl_compute_encoder *encoder, mtl_fence *fence);
+
+void mtl_compute_set_pipeline_state(mtl_compute_encoder *encoder,
+ mtl_compute_pipeline_state *state_handle);
+
+void mtl_compute_set_buffer(mtl_compute_encoder *encoder, mtl_buffer *buffer,
+ size_t offset, size_t index);
+
+void mtl_compute_use_resource(mtl_compute_encoder *encoder,
+ mtl_resource *res_handle, uint32_t usage);
+
+void mtl_compute_use_resources(mtl_compute_encoder *encoder,
+ mtl_resource **resource_handles, uint32_t count,
+ enum mtl_resource_usage usage);
+
+void mtl_compute_use_heaps(mtl_compute_encoder *encoder, mtl_heap **heaps,
+ uint32_t count);
+
+void mtl_dispatch_threads(mtl_compute_encoder *encoder,
+ struct mtl_size grid_size,
+ struct mtl_size local_size);
+
+void mtl_dispatch_threadgroups_with_indirect_buffer(
+ mtl_compute_encoder *encoder, mtl_buffer *buffer, uint32_t offset,
+ struct mtl_size local_size);
+
+/* MTLRenderEncoder */
+mtl_render_encoder *mtl_new_render_command_encoder_with_descriptor(
+ mtl_command_buffer *command_buffer, mtl_render_pass_descriptor *descriptor);
+
+void mtl_render_update_fence(mtl_render_encoder *encoder, mtl_fence *fence);
+void mtl_render_wait_for_fence(mtl_render_encoder *encoder, mtl_fence *fence);
+
+void mtl_set_viewports(mtl_render_encoder *encoder,
+ struct mtl_viewport *viewports, uint32_t count);
+
+void mtl_set_scissor_rects(mtl_render_encoder *encoder,
+ struct mtl_scissor_rect *scissor_rects,
+ uint32_t count);
+
+void mtl_render_set_pipeline_state(mtl_render_encoder *encoder,
+ mtl_render_pipeline_state *pipeline);
+
+void mtl_set_depth_stencil_state(mtl_render_encoder *encoder,
+ mtl_depth_stencil_state *state);
+
+void mtl_set_stencil_references(mtl_render_encoder *encoder, uint32_t front,
+ uint32_t back);
+
+void mtl_set_front_face_winding(mtl_render_encoder *encoder,
+ enum mtl_winding winding);
+
+void mtl_set_cull_mode(mtl_render_encoder *encoder, enum mtl_cull_mode mode);
+
+void mtl_set_visibility_result_mode(mtl_render_encoder *encoder,
+ enum mtl_visibility_result_mode mode,
+ size_t offset);
+
+void mtl_set_depth_bias(mtl_render_encoder *encoder, float depth_bias,
+ float slope_scale, float clamp);
+
+void mtl_set_depth_clip_mode(mtl_render_encoder *encoder,
+ enum mtl_depth_clip_mode mode);
+
+void mtl_set_vertex_amplification_count(mtl_render_encoder *encoder,
+ uint32_t *layer_ids, uint32_t id_count);
+
+void mtl_set_vertex_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer,
+ uint32_t offset, uint32_t index);
+
+void mtl_set_fragment_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer,
+ uint32_t offset, uint32_t index);
+
+void mtl_draw_primitives(mtl_render_encoder *encoder,
+ enum mtl_primitive_type primitve_type,
+ uint32_t vertexStart, uint32_t vertexCount,
+ uint32_t instanceCount, uint32_t baseInstance);
+
+void mtl_draw_indexed_primitives(
+ mtl_render_encoder *encoder, enum mtl_primitive_type primitve_type,
+ uint32_t index_count, enum mtl_index_type index_type,
+ mtl_buffer *index_buffer, uint32_t index_buffer_offset,
+ uint32_t instance_count, int32_t base_vertex, uint32_t base_instance);
+
+void mtl_draw_primitives_indirect(mtl_render_encoder *encoder,
+ enum mtl_primitive_type primitve_type,
+ mtl_buffer *indirect_buffer,
+ uint64_t indirect_buffer_offset);
+
+void mtl_draw_indexed_primitives_indirect(mtl_render_encoder *encoder,
+ enum mtl_primitive_type primitve_type,
+ enum mtl_index_type index_type,
+ mtl_buffer *index_buffer,
+ uint32_t index_buffer_offset,
+ mtl_buffer *indirect_buffer,
+ uint64_t indirect_buffer_offset);
+
+void mtl_render_use_resource(mtl_compute_encoder *encoder,
+ mtl_resource *res_handle, uint32_t usage);
+
+void mtl_render_use_resources(mtl_render_encoder *encoder,
+ mtl_resource **resource_handles, uint32_t count,
+ enum mtl_resource_usage usage);
+
+void mtl_render_use_heaps(mtl_render_encoder *encoder, mtl_heap **heaps,
+ uint32_t count);
+
+#endif /* MTL_ENCODER_H */
diff --git a/src/kosmickrisp/bridge/mtl_encoder.m b/src/kosmickrisp/bridge/mtl_encoder.m
new file mode 100644
index 00000000000..8385ea8ac0b
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_encoder.m
@@ -0,0 +1,537 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_encoder.h"
+
+#include <Metal/MTLBlitCommandEncoder.h>
+#include <Metal/MTLComputeCommandEncoder.h>
+#include <Metal/MTLRenderCommandEncoder.h>
+
+/* Common encoder utils */
+void
+mtl_end_encoding(void *encoder)
+{
+ @autoreleasepool {
+ id<MTLCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder;
+ [enc endEncoding];
+ }
+}
+
+/* MTLBlitEncoder */
+mtl_blit_encoder *
+mtl_new_blit_command_encoder(mtl_command_buffer *cmd_buffer)
+{
+ @autoreleasepool {
+ id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>)cmd_buffer;
+ return [[cmd_buf blitCommandEncoder] retain];
+ }
+}
+
+void
+mtl_blit_update_fence(mtl_blit_encoder *encoder,
+ mtl_fence *fence)
+{
+ @autoreleasepool {
+ id<MTLBlitCommandEncoder> enc = (id<MTLBlitCommandEncoder>)encoder;
+ id<MTLFence> f = (id<MTLFence>)fence;
+ [enc updateFence:f];
+ }
+}
+
+void
+mtl_blit_wait_for_fence(mtl_blit_encoder *encoder,
+ mtl_fence *fence)
+{
+ @autoreleasepool {
+ id<MTLBlitCommandEncoder> enc = (id<MTLBlitCommandEncoder>)encoder;
+ id<MTLFence> f = (id<MTLFence>)fence;
+ [enc waitForFence:f];
+ }
+}
+
+void
+mtl_copy_from_buffer_to_buffer(mtl_blit_encoder *blit_enc_handle,
+ mtl_buffer *src_buf, size_t src_offset,
+ mtl_buffer *dst_buf, size_t dst_offset,
+ size_t size)
+{
+ @autoreleasepool {
+ id<MTLBlitCommandEncoder> blit = (id<MTLBlitCommandEncoder>)blit_enc_handle;
+ id<MTLBuffer> mtl_src_buffer = (id<MTLBuffer>)src_buf;
+ id<MTLBuffer> mtl_dst_buffer = (id<MTLBuffer>)dst_buf;
+ [blit copyFromBuffer:mtl_src_buffer sourceOffset:src_offset toBuffer:mtl_dst_buffer destinationOffset:dst_offset size:size];
+ }
+}
+
+void
+mtl_copy_from_buffer_to_texture(mtl_blit_encoder *blit_enc_handle,
+ struct mtl_buffer_image_copy *data)
+{
+ @autoreleasepool {
+ const MTLSize size = MTLSizeMake(data->image_size.x, data->image_size.y, data->image_size.z);
+ const MTLOrigin origin = MTLOriginMake(data->image_origin.x, data->image_origin.y, data->image_origin.z);
+ id<MTLBlitCommandEncoder> blit = (id<MTLBlitCommandEncoder>)blit_enc_handle;
+ id<MTLBuffer> buffer = (id<MTLBuffer>)data->buffer;
+ id<MTLTexture> image = (id<MTLTexture>)data->image;
+ [blit copyFromBuffer:buffer
+ sourceOffset:data->buffer_offset_B
+ sourceBytesPerRow:data->buffer_stride_B
+ sourceBytesPerImage:data->buffer_2d_image_size_B
+ sourceSize:size
+ toTexture:image
+ destinationSlice:data->image_slice
+ destinationLevel:data->image_level
+ destinationOrigin:origin
+ options:(MTLBlitOption)data->options];
+ }
+}
+
+void
+mtl_copy_from_texture_to_buffer(mtl_blit_encoder *blit_enc_handle,
+ struct mtl_buffer_image_copy *data)
+{
+ @autoreleasepool {
+ const MTLSize size = MTLSizeMake(data->image_size.x, data->image_size.y, data->image_size.z);
+ const MTLOrigin origin = MTLOriginMake(data->image_origin.x, data->image_origin.y, data->image_origin.z);
+ id<MTLBlitCommandEncoder> blit = (id<MTLBlitCommandEncoder>)blit_enc_handle;
+ id<MTLBuffer> buffer = (id<MTLBuffer>)data->buffer;
+ id<MTLTexture> image = (id<MTLTexture>)data->image;
+ [blit copyFromTexture:image
+ sourceSlice:data->image_slice
+ sourceLevel:data->image_level
+ sourceOrigin:origin
+ sourceSize:size
+ toBuffer:buffer
+ destinationOffset:data->buffer_offset_B
+ destinationBytesPerRow:data->buffer_stride_B
+ destinationBytesPerImage:data->buffer_2d_image_size_B
+ options:(MTLBlitOption)data->options];
+ }
+}
+
+void
+mtl_copy_from_texture_to_texture(mtl_blit_encoder *blit_enc_handle,
+ mtl_texture *src_tex_handle, size_t src_slice,
+ size_t src_level, struct mtl_origin src_origin,
+ struct mtl_size src_size,
+ mtl_texture *dst_tex_handle, size_t dst_slice,
+ size_t dst_level, struct mtl_origin dst_origin)
+{
+ @autoreleasepool {
+ MTLOrigin mtl_src_origin = MTLOriginMake(src_origin.x, src_origin.y, src_origin.z);
+ MTLSize mtl_src_size = MTLSizeMake(src_size.x, src_size.y, src_size.z);
+ MTLOrigin mtl_dst_origin = MTLOriginMake(dst_origin.x, dst_origin.y, dst_origin.z);
+ id<MTLTexture> mtl_dst_tex = (id<MTLTexture>)dst_tex_handle;
+ id<MTLBlitCommandEncoder> blit = (id<MTLBlitCommandEncoder>)blit_enc_handle;
+ id<MTLTexture> mtl_src_tex = (id<MTLTexture>)src_tex_handle;
+ [blit copyFromTexture:mtl_src_tex
+ sourceSlice:src_slice
+ sourceLevel:src_level
+ sourceOrigin:mtl_src_origin
+ sourceSize:mtl_src_size
+ toTexture:mtl_dst_tex
+ destinationSlice:dst_slice
+ destinationLevel:dst_level
+ destinationOrigin:mtl_dst_origin];
+ }
+}
+
+/* MTLComputeEncoder */
+mtl_compute_encoder *
+mtl_new_compute_command_encoder(mtl_command_buffer *cmd_buffer)
+{
+ @autoreleasepool {
+ id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>)cmd_buffer;
+ return [[cmd_buf computeCommandEncoder] retain];
+ }
+}
+
+void
+mtl_compute_update_fence(mtl_compute_encoder *encoder, mtl_fence *fence)
+{
+ @autoreleasepool {
+ id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder;
+ id<MTLFence> f = (id<MTLFence>)fence;
+ [enc updateFence:f];
+ }
+}
+
+void
+mtl_compute_wait_for_fence(mtl_compute_encoder *encoder, mtl_fence *fence)
+{
+ @autoreleasepool {
+ id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder;
+ id<MTLFence> f = (id<MTLFence>)fence;
+ [enc waitForFence:f];
+ }
+}
+
+void
+mtl_compute_set_pipeline_state(mtl_compute_encoder *encoder,
+ mtl_compute_pipeline_state *state_handle)
+{
+ @autoreleasepool {
+ id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder;
+ id<MTLComputePipelineState> state = (id<MTLComputePipelineState>)state_handle;
+ [enc setComputePipelineState:state];
+ }
+}
+
+void
+mtl_compute_set_buffer(mtl_compute_encoder *encoder,
+ mtl_buffer *buffer, size_t offset, size_t index)
+{
+ @autoreleasepool {
+ id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder;
+ id<MTLBuffer> buf = (id<MTLBuffer>)buffer;
+ [enc setBuffer:buf offset:offset atIndex:index];
+ }
+}
+
+void
+mtl_compute_use_resource(mtl_compute_encoder *encoder,
+ mtl_resource *res_handle, uint32_t usage)
+{
+ @autoreleasepool {
+ id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder;
+ id<MTLResource> res = (id<MTLResource>)res_handle;
+ [enc useResource:res usage:(MTLResourceUsage)usage];
+ }
+}
+
+void
+mtl_compute_use_resources(mtl_compute_encoder *encoder,
+ mtl_resource **resource_handles, uint32_t count,
+ enum mtl_resource_usage usage)
+{
+ @autoreleasepool {
+ id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder;
+ id<MTLResource> *handles = (id<MTLResource>*)resource_handles;
+ [enc useResources:handles count:count usage:(MTLResourceUsage)usage];
+ }
+}
+
+void
+mtl_compute_use_heaps(mtl_compute_encoder *encoder, mtl_heap **heaps,
+ uint32_t count)
+{
+ @autoreleasepool {
+ id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder;
+ id<MTLHeap> *handles = (id<MTLHeap>*)heaps;
+ [enc useHeaps:handles count:count];
+ }
+}
+
+void
+mtl_dispatch_threads(mtl_compute_encoder *encoder,
+ struct mtl_size grid_size, struct mtl_size local_size)
+{
+ @autoreleasepool {
+ id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder;
+ MTLSize thread_count = MTLSizeMake(grid_size.x * local_size.x,
+ grid_size.y * local_size.y,
+ grid_size.z * local_size.z);
+ MTLSize threads_per_threadgroup = MTLSizeMake(local_size.x,
+ local_size.y,
+ local_size.z);
+
+ // TODO_KOSMICKRISP can we rely on nonuniform threadgroup size support?
+ [enc dispatchThreads:thread_count threadsPerThreadgroup:threads_per_threadgroup];
+ }
+}
+
+void
+mtl_dispatch_threadgroups_with_indirect_buffer(mtl_compute_encoder *encoder,
+ mtl_buffer *buffer,
+ uint32_t offset,
+ struct mtl_size local_size)
+{
+ @autoreleasepool {
+ id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder;
+ id<MTLBuffer> buf = (id<MTLBuffer>)buffer;
+ MTLSize threads_per_threadgroup = MTLSizeMake(local_size.x,
+ local_size.y,
+ local_size.z);
+
+ [enc dispatchThreadgroupsWithIndirectBuffer:buf indirectBufferOffset:offset threadsPerThreadgroup:threads_per_threadgroup];
+ }
+}
+
+/* MTLRenderEncoder */
+
+/* Encoder commands */
+mtl_render_encoder *
+mtl_new_render_command_encoder_with_descriptor(
+ mtl_command_buffer *command_buffer, mtl_render_pass_descriptor *descriptor)
+{
+ @autoreleasepool {
+ id<MTLCommandBuffer> cmd = (id<MTLCommandBuffer>)command_buffer;
+ MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor;
+ return [[cmd renderCommandEncoderWithDescriptor:desc] retain];
+ }
+}
+
+void
+mtl_render_update_fence(mtl_render_encoder *encoder, mtl_fence *fence)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLFence> f = (id<MTLFence>)fence;
+ [enc updateFence:f afterStages:MTLRenderStageFragment];
+ }
+}
+
+void
+mtl_render_wait_for_fence(mtl_render_encoder *encoder, mtl_fence *fence)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLFence> f = (id<MTLFence>)fence;
+ [enc waitForFence:f beforeStages:MTLRenderStageVertex];
+ }
+}
+
+void
+mtl_set_viewports(mtl_render_encoder *encoder, struct mtl_viewport *viewports,
+ uint32_t count)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ MTLViewport *vps = (MTLViewport *)viewports;
+ [enc setViewports:vps count:count];
+ }
+}
+
+void
+mtl_set_scissor_rects(mtl_render_encoder *encoder,
+ struct mtl_scissor_rect *scissor_rects, uint32_t count)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ MTLScissorRect *rects = (MTLScissorRect *)scissor_rects;
+ [enc setScissorRects:rects count:count];
+ }
+}
+
+void
+mtl_render_set_pipeline_state(mtl_render_encoder *encoder,
+ mtl_render_pipeline_state *pipeline)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLRenderPipelineState> pipe = (id<MTLRenderPipelineState>)pipeline;
+ [enc setRenderPipelineState:pipe];
+ }
+}
+
+void
+mtl_set_depth_stencil_state(mtl_render_encoder *encoder,
+ mtl_depth_stencil_state *state)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLDepthStencilState> s = (id<MTLDepthStencilState>)state;
+ [enc setDepthStencilState:s];
+ }
+}
+
+void
+mtl_set_stencil_references(mtl_render_encoder *encoder, uint32_t front,
+ uint32_t back)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ [enc setStencilFrontReferenceValue:front backReferenceValue:back];
+ }
+}
+
+void
+mtl_set_front_face_winding(mtl_render_encoder *encoder,
+ enum mtl_winding winding)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ [enc setFrontFacingWinding:(MTLWinding)winding];
+ }
+}
+
+void
+mtl_set_cull_mode(mtl_render_encoder *encoder, enum mtl_cull_mode mode)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ [enc setCullMode:(MTLCullMode)mode];
+ }
+}
+
+void
+mtl_set_visibility_result_mode(mtl_render_encoder *encoder,
+ enum mtl_visibility_result_mode mode,
+ size_t offset)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ [enc setVisibilityResultMode:(MTLVisibilityResultMode)mode offset:offset];
+ }
+}
+
+void
+mtl_set_depth_bias(mtl_render_encoder *encoder, float depth_bias,
+ float slope_scale, float clamp)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ [enc setDepthBias:depth_bias slopeScale:slope_scale clamp:clamp];
+ }
+}
+
+void
+mtl_set_depth_clip_mode(mtl_render_encoder *encoder,
+ enum mtl_depth_clip_mode mode)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ [enc setDepthClipMode:(MTLDepthClipMode)mode];
+ }
+}
+
+void
+mtl_set_vertex_amplification_count(mtl_render_encoder *encoder,
+ uint32_t *layer_ids, uint32_t id_count)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ MTLVertexAmplificationViewMapping mappings[32];
+ for (uint32_t i = 0u; i < id_count; ++i) {
+ mappings[i].renderTargetArrayIndexOffset = layer_ids[i];
+ mappings[i].viewportArrayIndexOffset = 0u;
+ }
+ [enc setVertexAmplificationCount:id_count viewMappings:mappings];
+ }
+}
+
+void
+mtl_set_vertex_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer,
+ uint32_t offset, uint32_t index)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLBuffer> buf = (id<MTLBuffer>)buffer;
+ [enc setVertexBuffer:buf offset:offset atIndex:index];
+ }
+}
+
+void
+mtl_set_fragment_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer,
+ uint32_t offset, uint32_t index)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLBuffer> buf = (id<MTLBuffer>)buffer;
+ [enc setFragmentBuffer:buf offset:offset atIndex:index];
+ }
+}
+
+void
+mtl_draw_primitives(mtl_render_encoder *encoder,
+ enum mtl_primitive_type primitve_type, uint32_t vertexStart,
+ uint32_t vertexCount, uint32_t instanceCount,
+ uint32_t baseInstance)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ MTLPrimitiveType type = (MTLPrimitiveType)primitve_type;
+ [enc drawPrimitives:type vertexStart:vertexStart vertexCount:vertexCount instanceCount:instanceCount baseInstance:baseInstance];
+ }
+}
+
+void
+mtl_draw_indexed_primitives(
+ mtl_render_encoder *encoder, enum mtl_primitive_type primitve_type,
+ uint32_t index_count, enum mtl_index_type index_type,
+ mtl_buffer *index_buffer, uint32_t index_buffer_offset,
+ uint32_t instance_count, int32_t base_vertex, uint32_t base_instance)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLBuffer> buf = (id<MTLBuffer>)index_buffer;
+ MTLIndexType ndx_type = (MTLIndexType)index_type;
+ MTLPrimitiveType primitive = (MTLPrimitiveType)primitve_type;
+ [enc drawIndexedPrimitives:primitive indexCount:index_count indexType:ndx_type indexBuffer:buf indexBufferOffset:index_buffer_offset instanceCount:instance_count baseVertex:base_vertex baseInstance:base_instance];
+ }
+}
+
+void
+mtl_draw_primitives_indirect(mtl_render_encoder *encoder,
+ enum mtl_primitive_type primitve_type,
+ mtl_buffer *indirect_buffer,
+ uint64_t indirect_buffer_offset)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLBuffer> buf = (id<MTLBuffer>)indirect_buffer;
+ MTLPrimitiveType type = (MTLPrimitiveType)primitve_type;
+ [enc drawPrimitives:type indirectBuffer:buf indirectBufferOffset:indirect_buffer_offset];
+ }
+}
+
+void
+mtl_draw_indexed_primitives_indirect(mtl_render_encoder *encoder,
+ enum mtl_primitive_type primitve_type,
+ enum mtl_index_type index_type,
+ mtl_buffer *index_buffer,
+ uint32_t index_buffer_offset,
+ mtl_buffer *indirect_buffer,
+ uint64_t indirect_buffer_offset)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLBuffer> buf = (id<MTLBuffer>)indirect_buffer;
+ id<MTLBuffer> ndx_buf = (id<MTLBuffer>)index_buffer;
+ MTLPrimitiveType type = (MTLPrimitiveType)primitve_type;
+ MTLIndexType ndx_type = (MTLIndexType)index_type;
+ [enc drawIndexedPrimitives:type indexType:ndx_type indexBuffer:ndx_buf indexBufferOffset:index_buffer_offset indirectBuffer:buf indirectBufferOffset:indirect_buffer_offset];
+ }
+}
+
+void
+mtl_render_use_resource(mtl_compute_encoder *encoder, mtl_resource *res_handle,
+ uint32_t usage)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLResource> res = (id<MTLResource>)res_handle;
+ [enc useResource:res usage:(MTLResourceUsage)usage stages:MTLRenderStageVertex|MTLRenderStageFragment];
+ }
+}
+
+void
+mtl_render_use_resources(mtl_render_encoder *encoder,
+ mtl_resource **resource_handles, uint32_t count,
+ enum mtl_resource_usage usage)
+{
+ @autoreleasepool {
+ // id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLResource> *handles = (id<MTLResource>*)resource_handles;
+ for (uint32_t i = 0u; i < count; ++i) {
+ if (handles[i] != NULL)
+ mtl_render_use_resource(encoder, handles[i], usage);
+ }
+ /* TODO_KOSMICKRISP No null values in the array or Metal complains */
+ // [enc useResources:handles count:count usage:(MTLResourceUsage)usage];
+ }
+}
+
+void
+mtl_render_use_heaps(mtl_render_encoder *encoder, mtl_heap **heaps,
+ uint32_t count)
+{
+ @autoreleasepool {
+ id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder;
+ id<MTLHeap> *handles = (id<MTLHeap>*)heaps;
+ [enc useHeaps:handles count:count stages:MTLRenderStageVertex|MTLRenderStageFragment];
+ }
+}
diff --git a/src/kosmickrisp/bridge/mtl_format.h b/src/kosmickrisp/bridge/mtl_format.h
new file mode 100644
index 00000000000..5a2577c5a28
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_format.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef MTL_FORMAT_H
+#define MTL_FORMAT_H 1
+
+/* TODO_KOSMICKRISP Haven't modified all
+ * Slightly modified naming so they match to enum pipe_format for convenience
+ */
+enum mtl_pixel_format {
+ MTL_PIXEL_FORMAT_INVALID = 0,
+
+ /* Normal 8 bit formats */
+ MTL_PIXEL_FORMAT_A8_UNORM = 1,
+
+ MTL_PIXEL_FORMAT_R8_UNORM = 10,
+ MTL_PIXEL_FORMAT_R8_SRGB = 11,
+ MTL_PIXEL_FORMAT_R8_SNORM = 12,
+ MTL_PIXEL_FORMAT_R8_UINT = 13,
+ MTL_PIXEL_FORMAT_R8_SINT = 14,
+
+ /* Normal 16 bit formats */
+ MTL_PIXEL_FORMAT_R16_UNORM = 20,
+ MTL_PIXEL_FORMAT_R16_SNORM = 22,
+ MTL_PIXEL_FORMAT_R16_UINT = 23,
+ MTL_PIXEL_FORMAT_R16_SINT = 24,
+ MTL_PIXEL_FORMAT_R16_FLOAT = 25,
+
+ MTL_PIXEL_FORMAT_R8G8_UNORM = 30,
+ MTL_PIXEL_FORMAT_R8G8_SRGB = 31,
+ MTL_PIXEL_FORMAT_R8G8_SNORM = 32,
+ MTL_PIXEL_FORMAT_R8G8_UINT = 33,
+ MTL_PIXEL_FORMAT_R8G8_SINT = 34,
+
+ /* Packed 16 bit formats */
+ MTL_PIXEL_FORMAT_B5G6R5_UNORM = 40,
+ MTL_PIXEL_FORMAT_A1B5G5R5_UNORM = 41,
+ MTL_PIXEL_FORMAT_A4B4G4R4_UNORM = 42,
+ MTL_PIXEL_FORMAT_B5G5R5A1_UNORM = 43,
+
+ /* Normal 32 bit formats */
+ MTL_PIXEL_FORMAT_R32_UINT = 53,
+ MTL_PIXEL_FORMAT_R32_SINT = 54,
+ MTL_PIXEL_FORMAT_R32_FLOAT = 55,
+
+ MTL_PIXEL_FORMAT_R16G16_UNORM = 60,
+ MTL_PIXEL_FORMAT_R16G16_SNORM = 62,
+ MTL_PIXEL_FORMAT_R16G16_UINT = 63,
+ MTL_PIXEL_FORMAT_R16G16_SINT = 64,
+ MTL_PIXEL_FORMAT_R16G16_FLOAT = 65,
+
+ MTL_PIXEL_FORMAT_R8G8B8A8_UNORM = 70,
+ MTL_PIXEL_FORMAT_R8G8B8A8_SRGB = 71,
+ MTL_PIXEL_FORMAT_R8G8B8A8_SNORM = 72,
+ MTL_PIXEL_FORMAT_R8G8B8A8_UINT = 73,
+ MTL_PIXEL_FORMAT_R8G8B8A8_SINT = 74,
+
+ MTL_PIXEL_FORMAT_B8G8R8A8_UNORM = 80,
+ MTL_PIXEL_FORMAT_B8G8R8A8_SRGB = 81,
+
+ /* Packed 32 bit formats */
+ MTL_PIXEL_FORMAT_R10G10B10A2_UNORM = 90,
+ MTL_PIXEL_FORMAT_R10G10B10A2_UINT = 91,
+
+ MTL_PIXEL_FORMAT_R11G11B10_FLOAT = 92,
+ MTL_PIXEL_FORMAT_R9G9B9E5_FLOAT = 93,
+
+ MTL_PIXEL_FORMAT_B10G10R10A2_UNORM = 94,
+
+ MTL_PIXEL_FORMAT_BGR10_XR = 554,
+ MTL_PIXEL_FORMAT_BGR10_XR_SRGB = 555,
+
+ /* Normal 64 bit formats */
+ MTL_PIXEL_FORMAT_R32G32_UINT = 103,
+ MTL_PIXEL_FORMAT_R32G32_SINT = 104,
+ MTL_PIXEL_FORMAT_R32G32_FLOAT = 105,
+
+ MTL_PIXEL_FORMAT_R16G16B16A16_UNORM = 110,
+ MTL_PIXEL_FORMAT_R16G16B16A16_SNORM = 112,
+ MTL_PIXEL_FORMAT_R16G16B16A16_UINT = 113,
+ MTL_PIXEL_FORMAT_R16G16B16A16_SINT = 114,
+ MTL_PIXEL_FORMAT_R16G16B16A16_FLOAT = 115,
+
+ MTL_PIXEL_FORMAT_BGRA10_XR = 552,
+ MTL_PIXEL_FORMAT_BGRA10_XR_SRGB = 553,
+
+ /* Normal 128 bit formats */
+ MTL_PIXEL_FORMAT_R32G32B32A32_UINT = 123,
+ MTL_PIXEL_FORMAT_R32G32B32A32_SINT = 124,
+ MTL_PIXEL_FORMAT_R32G32B32A32_FLOAT = 125,
+
+ /* Compressed formats. */
+
+ /* S3TC/DXT */
+ MTL_PIXEL_FORMAT_BC1_RGBA = 130,
+ MTL_PIXEL_FORMAT_BC1_RGBA_SRGB = 131,
+ MTL_PIXEL_FORMAT_BC2_RGBA = 132,
+ MTL_PIXEL_FORMAT_BC2_RGBA_SRGB = 133,
+ MTL_PIXEL_FORMAT_BC3_RGBA = 134,
+ MTL_PIXEL_FORMAT_BC3_RGBA_SRGB = 135,
+
+ /* RGTC */
+ MTL_PIXEL_FORMAT_BC4_R_UNORM = 140,
+ MTL_PIXEL_FORMAT_BC4_R_SNORM = 141,
+ MTL_PIXEL_FORMAT_BC5_RG_UNORM = 142,
+ MTL_PIXEL_FORMAT_BC5_RG_SNORM = 143,
+
+ /* BPTC */
+ MTL_PIXEL_FORMAT_BC6H_RGB_FLOAT = 150,
+ MTL_PIXEL_FORMAT_BC6H_RGBU_FLOAT = 151,
+ MTL_PIXEL_FORMAT_BC7_RGBA_UNORM = 152,
+ MTL_PIXEL_FORMAT_BC7_RGBA_SRGB = 153,
+
+ /* PVRTC */
+ MTL_PIXEL_FORMAT_PVRTC_RGB_2BPP = 160,
+ MTL_PIXEL_FORMAT_PVRTC_RGB_2BPP_SRGB = 161,
+ MTL_PIXEL_FORMAT_PVRTC_RGB_4BPP = 162,
+ MTL_PIXEL_FORMAT_PVRTC_RGB_4BPP_SRGB = 163,
+ MTL_PIXEL_FORMAT_PVRTC_RGBA_2BPP = 164,
+ MTL_PIXEL_FORMAT_PVRTC_RGBA_2BPP_SRGB = 165,
+ MTL_PIXEL_FORMAT_PVRTC_RGBA_4BPP = 166,
+ MTL_PIXEL_FORMAT_PVRTC_RGBA_4BPP_SRGB = 167,
+
+ /* ETC2 */
+ MTL_PIXEL_FORMAT_ETC2_R11_UNORM = 170,
+ MTL_PIXEL_FORMAT_ETC2_R11_SNORM = 172,
+ MTL_PIXEL_FORMAT_ETC2_RG11_UNORM = 174,
+ MTL_PIXEL_FORMAT_ETC2_RG11_SNORM = 176,
+ MTL_PIXEL_FORMAT_ETC2_RGBA8 = 178,
+ MTL_PIXEL_FORMAT_ETC2_SRGBA8 = 179,
+
+ MTL_PIXEL_FORMAT_ETC2_RGB8 = 180,
+ MTL_PIXEL_FORMAT_ETC2_SRGB8 = 181,
+ MTL_PIXEL_FORMAT_ETC2_RGB8A1 = 182,
+ MTL_PIXEL_FORMAT_ETC2_SRGB8A1 = 183,
+
+ /* ASTC */
+ MTL_PIXEL_FORMAT_ASTC_4x4_SRGB = 186,
+ MTL_PIXEL_FORMAT_ASTC_5x4_SRGB = 187,
+ MTL_PIXEL_FORMAT_ASTC_5x5_SRGB = 188,
+ MTL_PIXEL_FORMAT_ASTC_6x5_SRGB = 189,
+ MTL_PIXEL_FORMAT_ASTC_6x6_SRGB = 190,
+ MTL_PIXEL_FORMAT_ASTC_8x5_SRGB = 192,
+ MTL_PIXEL_FORMAT_ASTC_8x6_SRGB = 193,
+ MTL_PIXEL_FORMAT_ASTC_8x8_SRGB = 194,
+ MTL_PIXEL_FORMAT_ASTC_10x5_SRGB = 195,
+ MTL_PIXEL_FORMAT_ASTC_10x6_SRGB = 196,
+ MTL_PIXEL_FORMAT_ASTC_10x8_SRGB = 197,
+ MTL_PIXEL_FORMAT_ASTC_10x10_SRGB = 198,
+ MTL_PIXEL_FORMAT_ASTC_12x10_SRGB = 199,
+ MTL_PIXEL_FORMAT_ASTC_12x12_SRGB = 200,
+
+ MTL_PIXEL_FORMAT_ASTC_4x4 = 204,
+ MTL_PIXEL_FORMAT_ASTC_5x4 = 205,
+ MTL_PIXEL_FORMAT_ASTC_5x5 = 206,
+ MTL_PIXEL_FORMAT_ASTC_6x5 = 207,
+ MTL_PIXEL_FORMAT_ASTC_6x6 = 208,
+ MTL_PIXEL_FORMAT_ASTC_8x5 = 210,
+ MTL_PIXEL_FORMAT_ASTC_8x6 = 211,
+ MTL_PIXEL_FORMAT_ASTC_8x8 = 212,
+ MTL_PIXEL_FORMAT_ASTC_10x5 = 213,
+ MTL_PIXEL_FORMAT_ASTC_10x6 = 214,
+ MTL_PIXEL_FORMAT_ASTC_10x8 = 215,
+ MTL_PIXEL_FORMAT_ASTC_10x10 = 216,
+ MTL_PIXEL_FORMAT_ASTC_12x10 = 217,
+ MTL_PIXEL_FORMAT_ASTC_12x12 = 218,
+
+ /* ASTC HDR (High Dynamic Range) */
+ MTL_PIXEL_FORMAT_ASTC_4x4_HDR = 222,
+ MTL_PIXEL_FORMAT_ASTC_5x4_HDR = 223,
+ MTL_PIXEL_FORMAT_ASTC_5x5_HDR = 224,
+ MTL_PIXEL_FORMAT_ASTC_6x5_HDR = 225,
+ MTL_PIXEL_FORMAT_ASTC_6x6_HDR = 226,
+ MTL_PIXEL_FORMAT_ASTC_8x5_HDR = 228,
+ MTL_PIXEL_FORMAT_ASTC_8x6_HDR = 229,
+ MTL_PIXEL_FORMAT_ASTC_8x8_HDR = 230,
+ MTL_PIXEL_FORMAT_ASTC_10x5_HDR = 231,
+ MTL_PIXEL_FORMAT_ASTC_10x6_HDR = 232,
+ MTL_PIXEL_FORMAT_ASTC_10x8_HDR = 233,
+ MTL_PIXEL_FORMAT_ASTC_10x10_HDR = 234,
+ MTL_PIXEL_FORMAT_ASTC_12x10_HDR = 235,
+ MTL_PIXEL_FORMAT_ASTC_12x12_HDR = 236,
+
+ /* YUV */
+ MTL_PIXEL_FORMAT_GBGR422 = 240,
+ MTL_PIXEL_FORMAT_BGRG422 = 241,
+
+ /* DEPTH */
+ MTL_PIXEL_FORMAT_Z16_UNORM = 250,
+ MTL_PIXEL_FORMAT_Z32_FLOAT = 252,
+
+ /* STENCIL */
+ MTL_PIXEL_FORMAT_S8_UINT = 253,
+
+ /* DEPTH STENCIL */
+ MTL_PIXEL_FORMAT_Z24_UNORM_S8_UINT = 255,
+ MTL_PIXEL_FORMAT_Z32_FLOAT_S8X24_UINT = 260,
+
+ MTL_PIXEL_FORMAT_X32_S8X24_UINT = 261,
+ MTL_PIXEL_FORMAT_X24_S8_UINT = 262,
+};
+
+#endif /* MTL_FORMAT_H */
diff --git a/src/kosmickrisp/bridge/mtl_heap.h b/src/kosmickrisp/bridge/mtl_heap.h
new file mode 100644
index 00000000000..fd34ac4ac5b
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_heap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MTL_HEAP_H
+#define MTL_HEAP_H 1
+
+#include "mtl_types.h"
+
+#include <inttypes.h>
+
+/* TODO_KOSMICKRISP We should move this struct to the bridge side. */
+struct kk_image_layout;
+
+/* Creation */
+mtl_heap *mtl_new_heap(mtl_device *device, uint64_t size,
+ enum mtl_resource_options resource_options);
+
+/* Utils */
+uint64_t mtl_heap_get_size(mtl_heap *heap);
+
+/* Allocation from heap */
+mtl_buffer *mtl_new_buffer_with_length(mtl_heap *heap, uint64_t size_B,
+ uint64_t offset_B);
+mtl_texture *mtl_new_texture_with_descriptor(
+ mtl_heap *heap, const struct kk_image_layout *layout, uint64_t offset);
+
+#endif /* MTL_HEAP_H */ \ No newline at end of file
diff --git a/src/kosmickrisp/bridge/mtl_heap.m b/src/kosmickrisp/bridge/mtl_heap.m
new file mode 100644
index 00000000000..c658c0253ec
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_heap.m
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_heap.h"
+
+/* TODO_KOSMICKRISP Remove */
+#include "kk_private.h"
+#include "kk_image_layout.h"
+
+#include <Metal/MTLHeap.h>
+
+/* Creation */
+mtl_heap *
+mtl_new_heap(mtl_device *device, uint64_t size,
+ enum mtl_resource_options resource_options)
+{
+ @autoreleasepool {
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ MTLHeapDescriptor *descriptor = [[MTLHeapDescriptor new] autorelease];
+ descriptor.type = MTLHeapTypePlacement;
+ descriptor.resourceOptions = (MTLResourceOptions)resource_options;
+ descriptor.size = size;
+ descriptor.sparsePageSize = MTLSparsePageSize16;
+ return [dev newHeapWithDescriptor:descriptor];
+ }
+}
+
+/* Utils */
+uint64_t
+mtl_heap_get_size(mtl_heap *heap)
+{
+ @autoreleasepool {
+ id<MTLHeap> hp = (id<MTLHeap>)heap;
+ return hp.size;
+ }
+}
+
+static MTLTextureDescriptor *
+mtl_new_texture_descriptor(const struct kk_image_layout *layout)
+{
+ @autoreleasepool {
+ MTLTextureDescriptor *descriptor = [MTLTextureDescriptor new];
+ descriptor.textureType = (MTLTextureType)layout->type;
+ descriptor.pixelFormat = layout->format.mtl;
+ descriptor.width = layout->width_px;
+ descriptor.height = layout->height_px;
+ descriptor.depth = layout->depth_px;
+ descriptor.mipmapLevelCount = layout->levels;
+ descriptor.sampleCount = layout->sample_count_sa;
+ descriptor.arrayLength = layout->layers;
+ descriptor.allowGPUOptimizedContents = layout->optimized_layout;
+ descriptor.usage = (MTLTextureUsage)layout->usage;
+ /* We don't set the swizzle because Metal complains when the usage has store or render target with swizzle... */
+
+ return descriptor;
+ }
+}
+
+/* Allocation from heap */
+mtl_buffer *
+mtl_new_buffer_with_length(mtl_heap *heap, uint64_t size_B, uint64_t offset_B)
+{
+ @autoreleasepool {
+ id<MTLHeap> hp = (id<MTLHeap>)heap;
+ return (mtl_buffer *)[hp newBufferWithLength:size_B options:KK_MTL_RESOURCE_OPTIONS offset:offset_B];
+ }
+}
+
+mtl_texture *
+mtl_new_texture_with_descriptor(mtl_heap *heap,
+ const struct kk_image_layout *layout,
+ uint64_t offset)
+{
+ @autoreleasepool {
+ id<MTLHeap> hp = (id<MTLHeap>)heap;
+ MTLTextureDescriptor *descriptor = [mtl_new_texture_descriptor(layout) autorelease];
+ descriptor.resourceOptions = hp.resourceOptions;
+ return [hp newTextureWithDescriptor:descriptor offset:offset];
+ }
+}
diff --git a/src/kosmickrisp/bridge/mtl_library.h b/src/kosmickrisp/bridge/mtl_library.h
new file mode 100644
index 00000000000..32bdd8a5529
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_library.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MTL_LIBRARY_H
+#define MTL_LIBRARY_H 1
+
+#include "mtl_types.h"
+
+mtl_library *mtl_new_library(mtl_device *device, const char *src);
+mtl_function *mtl_new_function_with_name(mtl_library *lib,
+ const char *entry_point);
+
+#endif /* MTL_LIBRARY_H */
diff --git a/src/kosmickrisp/bridge/mtl_library.m b/src/kosmickrisp/bridge/mtl_library.m
new file mode 100644
index 00000000000..a40cea9619b
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_library.m
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_library.h"
+
+#include <Metal/MTLDevice.h>
+
+mtl_library *
+mtl_new_library(mtl_device *device, const char *src)
+{
+ @autoreleasepool {
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ id<MTLLibrary> lib = NULL;
+ NSString *nsstr = [NSString stringWithCString:src encoding:NSASCIIStringEncoding];
+ NSError *error;
+ MTLCompileOptions *comp_opts = [MTLCompileOptions new];
+ comp_opts.languageVersion = MTLLanguageVersion3_2;
+ comp_opts.mathMode = MTLMathModeSafe;
+ comp_opts.mathFloatingPointFunctions = MTLMathFloatingPointFunctionsPrecise;
+ lib = [dev newLibraryWithSource:nsstr options:comp_opts error:&error];
+
+ if (error != nil) {
+ fprintf(stderr, "Failed to create MTLLibrary: %s\n", [error.localizedDescription UTF8String]);
+ }
+
+ [comp_opts release];
+ return lib;
+ }
+}
+
+mtl_function *
+mtl_new_function_with_name(mtl_library *lib, const char *entry_point)
+{
+ @autoreleasepool {
+ id<MTLLibrary> mtl_lib = (id<MTLLibrary>)lib;
+ NSString *ns_entry_point = [NSString stringWithCString:entry_point encoding:NSASCIIStringEncoding];
+ return [mtl_lib newFunctionWithName:ns_entry_point];
+ }
+}
+
diff --git a/src/kosmickrisp/bridge/mtl_render_state.h b/src/kosmickrisp/bridge/mtl_render_state.h
new file mode 100644
index 00000000000..cf13b4678f8
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_render_state.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MTL_RENDER_STATE_H
+#define MTL_RENDER_STATE_H 1
+
+#include "mtl_types.h"
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+/* Bridge enums */
+enum mtl_pixel_format;
+
+/* TODO_KOSMICKRISP Remove */
+enum VkCompareOp;
+enum VkStencilOp;
+
+/* Render pass descriptor */
+mtl_render_pass_descriptor *mtl_new_render_pass_descriptor(void);
+
+mtl_render_pass_attachment_descriptor *
+mtl_render_pass_descriptor_get_color_attachment(
+ mtl_render_pass_descriptor *descriptor, uint32_t index);
+
+mtl_render_pass_attachment_descriptor *
+mtl_render_pass_descriptor_get_depth_attachment(
+ mtl_render_pass_descriptor *descriptor);
+
+mtl_render_pass_attachment_descriptor *
+mtl_render_pass_descriptor_get_stencil_attachment(
+ mtl_render_pass_descriptor *descriptor);
+
+void mtl_render_pass_attachment_descriptor_set_texture(
+ mtl_render_pass_attachment_descriptor *descriptor, mtl_texture *texture);
+
+void mtl_render_pass_attachment_descriptor_set_level(
+ mtl_render_pass_attachment_descriptor *descriptor, uint32_t level);
+
+void mtl_render_pass_attachment_descriptor_set_slice(
+ mtl_render_pass_attachment_descriptor *descriptor, uint32_t slice);
+
+void mtl_render_pass_attachment_descriptor_set_load_action(
+ mtl_render_pass_attachment_descriptor *descriptor,
+ enum mtl_load_action action);
+
+void mtl_render_pass_attachment_descriptor_set_store_action(
+ mtl_render_pass_attachment_descriptor *descriptor,
+ enum mtl_store_action action);
+
+void mtl_render_pass_attachment_descriptor_set_clear_color(
+ mtl_render_pass_attachment_descriptor *descriptor,
+ struct mtl_clear_color clear_color);
+
+void mtl_render_pass_attachment_descriptor_set_clear_depth(
+ mtl_render_pass_attachment_descriptor *descriptor, double depth);
+
+void mtl_render_pass_attachment_descriptor_set_clear_stencil(
+ mtl_render_pass_attachment_descriptor *descriptor, uint32_t stencil);
+
+void mtl_render_pass_descriptor_set_render_target_array_length(
+ mtl_render_pass_descriptor *descriptor, uint32_t length);
+
+void mtl_render_pass_descriptor_set_render_target_width(
+ mtl_render_pass_descriptor *descriptor, uint32_t width);
+
+void mtl_render_pass_descriptor_set_render_target_height(
+ mtl_render_pass_descriptor *descriptor, uint32_t height);
+
+void mtl_render_pass_descriptor_set_default_raster_sample_count(
+ mtl_render_pass_descriptor *descriptor, uint32_t sample_count);
+
+void mtl_render_pass_descriptor_set_visibility_buffer(
+ mtl_render_pass_descriptor *descriptor, mtl_buffer *visibility_buffer);
+
+/* Render pipeline descriptor */
+mtl_render_pipeline_descriptor *mtl_new_render_pipeline_descriptor(void);
+
+void mtl_render_pipeline_descriptor_set_vertex_shader(
+ mtl_render_pass_descriptor *descriptor, mtl_function *shader);
+
+void mtl_render_pipeline_descriptor_set_fragment_shader(
+ mtl_render_pass_descriptor *descriptor, mtl_function *shader);
+
+void mtl_render_pipeline_descriptor_set_input_primitive_topology(
+ mtl_render_pass_descriptor *descriptor,
+ enum mtl_primitive_topology_class topology_class);
+
+void mtl_render_pipeline_descriptor_set_color_attachment_format(
+ mtl_render_pass_descriptor *descriptor, uint8_t index,
+ enum mtl_pixel_format format);
+
+void mtl_render_pipeline_descriptor_set_depth_attachment_format(
+ mtl_render_pass_descriptor *descriptor, enum mtl_pixel_format format);
+
+void mtl_render_pipeline_descriptor_set_stencil_attachment_format(
+ mtl_render_pass_descriptor *descriptor, enum mtl_pixel_format format);
+
+void mtl_render_pipeline_descriptor_set_raster_sample_count(
+ mtl_render_pass_descriptor *descriptor, uint32_t sample_count);
+
+void mtl_render_pipeline_descriptor_set_alpha_to_coverage(
+ mtl_render_pass_descriptor *descriptor, bool enabled);
+
+void mtl_render_pipeline_descriptor_set_alpha_to_one(
+ mtl_render_pass_descriptor *descriptor, bool enabled);
+
+void mtl_render_pipeline_descriptor_set_rasterization_enabled(
+ mtl_render_pass_descriptor *descriptor, bool enabled);
+
+void mtl_render_pipeline_descriptor_set_max_vertex_amplification_count(
+ mtl_render_pass_descriptor *descriptor, uint32_t count);
+
+/* Render pipeline */
+mtl_render_pipeline_state *
+mtl_new_render_pipeline(mtl_device *device,
+ mtl_render_pass_descriptor *descriptor);
+
+/* Stencil descriptor */
+mtl_stencil_descriptor *mtl_new_stencil_descriptor(void);
+
+void mtl_stencil_descriptor_set_stencil_failure_operation(
+ mtl_stencil_descriptor *descriptor, enum VkStencilOp op);
+
+void mtl_stencil_descriptor_set_depth_failure_operation(
+ mtl_stencil_descriptor *descriptor, enum VkStencilOp op);
+
+void mtl_stencil_descriptor_set_depth_stencil_pass_operation(
+ mtl_stencil_descriptor *descriptor, enum VkStencilOp op);
+
+void mtl_stencil_descriptor_set_stencil_compare_function(
+ mtl_stencil_descriptor *descriptor, enum VkCompareOp op);
+
+void mtl_stencil_descriptor_set_read_mask(mtl_stencil_descriptor *descriptor,
+ uint32_t mask);
+
+void mtl_stencil_descriptor_set_write_mask(mtl_stencil_descriptor *descriptor,
+ uint32_t mask);
+
+/* Depth stencil descriptor */
+mtl_depth_stencil_descriptor *mtl_new_depth_stencil_descriptor(void);
+
+void mtl_depth_stencil_descriptor_set_depth_compare_function(
+ mtl_depth_stencil_descriptor *descriptor, enum VkCompareOp op);
+
+void mtl_depth_stencil_descriptor_set_depth_write_enabled(
+ mtl_depth_stencil_descriptor *descriptor, bool enable_write);
+
+void mtl_depth_stencil_descriptor_set_back_face_stencil(
+ mtl_depth_stencil_descriptor *descriptor,
+ mtl_stencil_descriptor *stencil_descriptor);
+
+void mtl_depth_stencil_descriptor_set_front_face_stencil(
+ mtl_depth_stencil_descriptor *descriptor,
+ mtl_stencil_descriptor *stencil_descriptor);
+
+/* Depth stencil state */
+mtl_depth_stencil_state *
+mtl_new_depth_stencil_state(mtl_device *device,
+ mtl_depth_stencil_descriptor *descriptor);
+
+#endif /* MTL_RENDER_STATE_H */
diff --git a/src/kosmickrisp/bridge/mtl_render_state.m b/src/kosmickrisp/bridge/mtl_render_state.m
new file mode 100644
index 00000000000..0a5051c286a
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_render_state.m
@@ -0,0 +1,475 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_render_state.h"
+
+#include "mtl_format.h"
+
+/* TODO_KOSMICKRISP Remove */
+#include "vk_to_mtl_map.h"
+
+/* TODO_KOSMICKRISP Remove */
+#include "vulkan/vulkan.h"
+
+#include <Metal/MTLRenderPass.h>
+#include <Metal/MTLRenderPipeline.h>
+#include <Metal/MTLDepthStencil.h>
+
+/* Render pass descriptor */
+mtl_render_pass_descriptor *
+mtl_new_render_pass_descriptor(void)
+{
+ @autoreleasepool {
+ return [[MTLRenderPassDescriptor renderPassDescriptor] retain];
+ }
+}
+
+mtl_render_pass_attachment_descriptor *
+mtl_render_pass_descriptor_get_color_attachment(
+ mtl_render_pass_descriptor *descriptor, uint32_t index)
+{
+ @autoreleasepool {
+ MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor;
+ return desc.colorAttachments[index];
+ }
+}
+
+mtl_render_pass_attachment_descriptor *
+mtl_render_pass_descriptor_get_depth_attachment(
+ mtl_render_pass_descriptor *descriptor)
+{
+ @autoreleasepool {
+ MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor;
+ return desc.depthAttachment;
+ }
+}
+
+mtl_render_pass_attachment_descriptor *
+mtl_render_pass_descriptor_get_stencil_attachment(
+ mtl_render_pass_descriptor *descriptor)
+{
+ @autoreleasepool {
+ MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor;
+ return desc.stencilAttachment;
+ }
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_texture(
+ mtl_render_pass_attachment_descriptor *descriptor, mtl_texture *texture)
+{
+ @autoreleasepool {
+ MTLRenderPassAttachmentDescriptor *desc = (MTLRenderPassAttachmentDescriptor *)descriptor;
+ desc.texture = (id<MTLTexture>)texture;
+ }
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_level(
+ mtl_render_pass_attachment_descriptor *descriptor, uint32_t level)
+{
+ @autoreleasepool {
+ MTLRenderPassAttachmentDescriptor *desc = (MTLRenderPassAttachmentDescriptor *)descriptor;
+ desc.level = level;
+ }
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_slice(
+ mtl_render_pass_attachment_descriptor *descriptor, uint32_t slice)
+{
+ @autoreleasepool {
+ MTLRenderPassAttachmentDescriptor *desc = (MTLRenderPassAttachmentDescriptor *)descriptor;
+ desc.slice = slice;
+ }
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_load_action(
+ mtl_render_pass_attachment_descriptor *descriptor,
+ enum mtl_load_action action)
+{
+ @autoreleasepool {
+ MTLRenderPassAttachmentDescriptor *desc = (MTLRenderPassAttachmentDescriptor *)descriptor;
+ desc.loadAction = (MTLLoadAction)action;
+ }
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_store_action(
+ mtl_render_pass_attachment_descriptor *descriptor,
+ enum mtl_store_action action)
+{
+ @autoreleasepool {
+ MTLRenderPassAttachmentDescriptor *desc = (MTLRenderPassAttachmentDescriptor *)descriptor;
+ desc.storeAction = (MTLStoreAction)action;
+ desc.storeActionOptions = MTLStoreActionOptionNone; /* TODO_KOSMICKRISP Maybe expose this? */
+ }
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_clear_color(
+ mtl_render_pass_attachment_descriptor *descriptor,
+ struct mtl_clear_color clear_color)
+{
+ @autoreleasepool {
+ MTLRenderPassColorAttachmentDescriptor *desc = (MTLRenderPassColorAttachmentDescriptor *)descriptor;
+ desc.clearColor = MTLClearColorMake(clear_color.red, clear_color.green, clear_color.blue, clear_color.alpha);
+ }
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_clear_depth(
+ mtl_render_pass_attachment_descriptor *descriptor, double depth)
+{
+ @autoreleasepool {
+ MTLRenderPassDepthAttachmentDescriptor *desc = (MTLRenderPassDepthAttachmentDescriptor *)descriptor;
+ desc.clearDepth = depth;
+ }
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_clear_stencil(mtl_render_pass_attachment_descriptor *descriptor,
+ uint32_t stencil)
+{
+ @autoreleasepool {
+ MTLRenderPassStencilAttachmentDescriptor *desc = (MTLRenderPassStencilAttachmentDescriptor *)descriptor;
+ desc.clearStencil = stencil;
+ }
+}
+
+void
+mtl_render_pass_descriptor_set_render_target_array_length(mtl_render_pass_descriptor *descriptor,
+ uint32_t length)
+{
+ @autoreleasepool {
+ MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor;
+ desc.renderTargetArrayLength = length;
+ }
+}
+
+void
+mtl_render_pass_descriptor_set_render_target_width(mtl_render_pass_descriptor *descriptor,
+ uint32_t width)
+{
+ @autoreleasepool {
+ MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor;
+ desc.renderTargetWidth = width;
+ }
+}
+
+void
+mtl_render_pass_descriptor_set_render_target_height(mtl_render_pass_descriptor *descriptor,
+ uint32_t height)
+{
+ @autoreleasepool {
+ MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor;
+ desc.renderTargetHeight = height;
+ }
+}
+
+void
+mtl_render_pass_descriptor_set_default_raster_sample_count(mtl_render_pass_descriptor *descriptor,
+ uint32_t sample_count)
+{
+ @autoreleasepool {
+ MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor;
+ desc.defaultRasterSampleCount = sample_count;
+ }
+}
+
+void
+mtl_render_pass_descriptor_set_visibility_buffer(mtl_render_pass_descriptor *descriptor,
+ mtl_buffer *visibility_buffer)
+{
+ @autoreleasepool {
+ MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor;
+ id<MTLBuffer> buffer = (id<MTLBuffer>)visibility_buffer;
+ desc.visibilityResultBuffer = buffer;
+ }
+}
+
+/* Render pipeline descriptor */
+mtl_render_pipeline_descriptor *
+mtl_new_render_pipeline_descriptor()
+{
+ @autoreleasepool {
+ return [[MTLRenderPipelineDescriptor alloc] init];
+ }
+}
+
+void
+mtl_render_pipeline_descriptor_set_vertex_shader(mtl_render_pass_descriptor *descriptor,
+ mtl_function *shader)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ desc.vertexFunction = (id<MTLFunction>)shader;
+ }
+}
+
+void
+mtl_render_pipeline_descriptor_set_fragment_shader(mtl_render_pass_descriptor *descriptor,
+ mtl_function *shader)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ desc.fragmentFunction = (id<MTLFunction>)shader;
+ }
+}
+
+void
+mtl_render_pipeline_descriptor_set_input_primitive_topology(mtl_render_pass_descriptor *descriptor,
+ enum mtl_primitive_topology_class class)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ desc.inputPrimitiveTopology = (MTLPrimitiveTopologyClass)class;
+ }
+}
+
+void
+mtl_render_pipeline_descriptor_set_color_attachment_format(mtl_render_pass_descriptor *descriptor,
+ uint8_t index,
+ enum mtl_pixel_format format)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ desc.colorAttachments[index].pixelFormat = (MTLPixelFormat)format;
+ }
+}
+
+void
+mtl_render_pipeline_descriptor_set_depth_attachment_format(mtl_render_pass_descriptor *descriptor,
+ enum mtl_pixel_format format)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ desc.depthAttachmentPixelFormat = (MTLPixelFormat)format;
+ }
+}
+
+void
+mtl_render_pipeline_descriptor_set_stencil_attachment_format(mtl_render_pass_descriptor *descriptor,
+ enum mtl_pixel_format format)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ desc.stencilAttachmentPixelFormat = (MTLPixelFormat)format;
+ }
+}
+
+void
+mtl_render_pipeline_descriptor_set_raster_sample_count(mtl_render_pass_descriptor *descriptor,
+ uint32_t sample_count)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ desc.rasterSampleCount = sample_count;
+ }
+}
+
+void
+mtl_render_pipeline_descriptor_set_alpha_to_coverage(mtl_render_pass_descriptor *descriptor,
+ bool enabled)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ desc.alphaToCoverageEnabled = enabled;
+ }
+}
+
+void
+mtl_render_pipeline_descriptor_set_alpha_to_one(mtl_render_pass_descriptor *descriptor,
+ bool enabled)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ desc.alphaToOneEnabled = enabled;
+ }
+}
+
+void
+mtl_render_pipeline_descriptor_set_rasterization_enabled(mtl_render_pass_descriptor *descriptor,
+ bool enabled)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ desc.rasterizationEnabled = enabled;
+ }
+}
+
+void
+mtl_render_pipeline_descriptor_set_max_vertex_amplification_count( mtl_render_pass_descriptor *descriptor,
+ uint32_t count)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ desc.maxVertexAmplificationCount = count;
+ }
+}
+
+/* Render pipeline */
+mtl_render_pipeline_state *
+mtl_new_render_pipeline(mtl_device *device, mtl_render_pass_descriptor *descriptor)
+{
+ @autoreleasepool {
+ MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor;
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ NSError *error = nil;
+ mtl_render_pipeline_state *pipeline = [dev newRenderPipelineStateWithDescriptor:desc error:&error];
+ if (error != nil) {
+ fprintf(stderr, "Failed to create MTLLibrary: %s\n", [error.localizedDescription UTF8String]);
+ }
+
+ return pipeline;
+ }
+}
+
+/* Stencil descriptor */
+mtl_stencil_descriptor *
+mtl_new_stencil_descriptor()
+{
+ @autoreleasepool {
+ return [[MTLStencilDescriptor new] init];
+ }
+}
+
+/* TODO_KOSMICKRISP Move this to map */
+static MTLStencilOperation
+map_vk_stencil_op_to_mtl_stencil_operation(VkStencilOp op)
+{
+ switch (op) {
+ case VK_STENCIL_OP_KEEP:
+ return MTLStencilOperationKeep;
+ case VK_STENCIL_OP_ZERO:
+ return MTLStencilOperationZero;
+ case VK_STENCIL_OP_REPLACE:
+ return MTLStencilOperationReplace;
+ case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
+ return MTLStencilOperationIncrementClamp;
+ case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
+ return MTLStencilOperationDecrementClamp;
+ case VK_STENCIL_OP_INVERT:
+ return MTLStencilOperationInvert;
+ case VK_STENCIL_OP_INCREMENT_AND_WRAP:
+ return MTLStencilOperationIncrementWrap;
+ case VK_STENCIL_OP_DECREMENT_AND_WRAP:
+ return MTLStencilOperationDecrementWrap;
+ default:
+ assert(false && "Unsupported VkStencilOp");
+ return MTLStencilOperationZero;
+ };
+}
+
+void
+mtl_stencil_descriptor_set_stencil_failure_operation(mtl_stencil_descriptor *descriptor, VkStencilOp op)
+{
+ @autoreleasepool {
+ MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor;
+ desc.stencilFailureOperation = map_vk_stencil_op_to_mtl_stencil_operation(op);
+ }
+}
+
+void
+mtl_stencil_descriptor_set_depth_failure_operation(mtl_stencil_descriptor *descriptor, VkStencilOp op)
+{
+ @autoreleasepool {
+ MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor;
+ desc.depthFailureOperation = map_vk_stencil_op_to_mtl_stencil_operation(op);
+ }
+}
+
+void
+mtl_stencil_descriptor_set_depth_stencil_pass_operation(mtl_stencil_descriptor *descriptor, VkStencilOp op)
+{
+ @autoreleasepool {
+ MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor;
+ desc.depthStencilPassOperation = map_vk_stencil_op_to_mtl_stencil_operation(op);
+ }
+}
+
+void
+mtl_stencil_descriptor_set_stencil_compare_function(mtl_stencil_descriptor *descriptor, VkCompareOp op)
+{
+ @autoreleasepool {
+ MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor;
+ desc.stencilCompareFunction = (MTLCompareFunction)vk_compare_op_to_mtl_compare_function(op);
+ }
+}
+
+void
+mtl_stencil_descriptor_set_read_mask(mtl_stencil_descriptor *descriptor, uint32_t mask)
+{
+ @autoreleasepool {
+ MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor;
+ desc.readMask = mask;
+ }
+}
+
+void
+mtl_stencil_descriptor_set_write_mask(mtl_stencil_descriptor *descriptor, uint32_t mask)
+{
+ @autoreleasepool {
+ MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor;
+ desc.writeMask = mask;
+ }
+}
+
+/* Depth stencil descriptor */
+mtl_depth_stencil_descriptor *
+mtl_new_depth_stencil_descriptor()
+{
+ @autoreleasepool {
+ return [[MTLDepthStencilDescriptor new] init];
+ }
+}
+
+void
+mtl_depth_stencil_descriptor_set_depth_compare_function(mtl_depth_stencil_descriptor *descriptor, VkCompareOp op)
+{
+ @autoreleasepool {
+ MTLDepthStencilDescriptor *desc = (MTLDepthStencilDescriptor *)descriptor;
+ desc.depthCompareFunction = (MTLCompareFunction)vk_compare_op_to_mtl_compare_function(op);
+ }
+}
+
+void
+mtl_depth_stencil_descriptor_set_depth_write_enabled(mtl_depth_stencil_descriptor *descriptor, bool enable_write)
+{
+ @autoreleasepool {
+ MTLDepthStencilDescriptor *desc = (MTLDepthStencilDescriptor *)descriptor;
+ desc.depthWriteEnabled = enable_write;
+ }
+}
+
+void
+mtl_depth_stencil_descriptor_set_back_face_stencil(mtl_depth_stencil_descriptor *descriptor, mtl_stencil_descriptor *stencil_descriptor)
+{
+ @autoreleasepool {
+ MTLDepthStencilDescriptor *desc = (MTLDepthStencilDescriptor *)descriptor;
+ desc.backFaceStencil = (MTLStencilDescriptor *)stencil_descriptor;
+ }
+}
+
+void
+mtl_depth_stencil_descriptor_set_front_face_stencil(mtl_depth_stencil_descriptor *descriptor, mtl_stencil_descriptor *stencil_descriptor)
+{
+ @autoreleasepool {
+ MTLDepthStencilDescriptor *desc = (MTLDepthStencilDescriptor *)descriptor;
+ desc.frontFaceStencil = (MTLStencilDescriptor *)stencil_descriptor;
+ }
+}
+
+mtl_depth_stencil_state *
+mtl_new_depth_stencil_state(mtl_device *device, mtl_depth_stencil_descriptor *descriptor)
+{
+ @autoreleasepool {
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ MTLDepthStencilDescriptor *desc = (MTLDepthStencilDescriptor *)descriptor;
+ return [dev newDepthStencilStateWithDescriptor:desc];
+ }
+}
diff --git a/src/kosmickrisp/bridge/mtl_sampler.h b/src/kosmickrisp/bridge/mtl_sampler.h
new file mode 100644
index 00000000000..ab751195b2c
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_sampler.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MTL_SAMPLER_H
+#define MTL_SAMPLER_H 1
+
+#include "mtl_types.h"
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+/* Sampler descriptor */
+mtl_sampler_descriptor *mtl_new_sampler_descriptor(void);
+
+/* Sampler descriptor utils */
+void mtl_sampler_descriptor_set_normalized_coordinates(
+ mtl_sampler_descriptor *descriptor, bool normalized_coordinates);
+void mtl_sampler_descriptor_set_address_mode(
+ mtl_sampler_descriptor *descriptor,
+ enum mtl_sampler_address_mode address_mode_u,
+ enum mtl_sampler_address_mode address_mode_v,
+ enum mtl_sampler_address_mode address_mode_w);
+void
+mtl_sampler_descriptor_set_border_color(mtl_sampler_descriptor *descriptor,
+ enum mtl_sampler_border_color color);
+void
+mtl_sampler_descriptor_set_filters(mtl_sampler_descriptor *descriptor,
+ enum mtl_sampler_min_mag_filter min_filter,
+ enum mtl_sampler_min_mag_filter mag_filter,
+ enum mtl_sampler_mip_filter mip_filter);
+void mtl_sampler_descriptor_set_lod_clamp(mtl_sampler_descriptor *descriptor,
+ float min, float max);
+void
+mtl_sampler_descriptor_set_max_anisotropy(mtl_sampler_descriptor *descriptor,
+ uint64_t max);
+void
+mtl_sampler_descriptor_set_compare_function(mtl_sampler_descriptor *descriptor,
+ enum mtl_compare_function function);
+
+/* Sampler */
+mtl_sampler *mtl_new_sampler(mtl_device *device,
+ mtl_sampler_descriptor *descriptor);
+
+/* Sampler utils */
+uint64_t mtl_sampler_get_gpu_resource_id(mtl_sampler *sampler);
+
+#endif /* MTL_SAMPLER_H */ \ No newline at end of file
diff --git a/src/kosmickrisp/bridge/mtl_sampler.m b/src/kosmickrisp/bridge/mtl_sampler.m
new file mode 100644
index 00000000000..f7ddb66b7e1
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_sampler.m
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_sampler.h"
+
+#include <Metal/MTLSampler.h>
+
+mtl_sampler_descriptor *
+mtl_new_sampler_descriptor()
+{
+ @autoreleasepool {
+ MTLSamplerDescriptor *descriptor = [MTLSamplerDescriptor new];
+ /* Set common variables we don't expose */
+ descriptor.lodAverage = false;
+ descriptor.supportArgumentBuffers = true;
+ return descriptor;
+ }
+}
+
+void
+mtl_sampler_descriptor_set_normalized_coordinates(mtl_sampler_descriptor *descriptor, bool normalized_coordinates)
+{
+ @autoreleasepool {
+ MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor;
+ desc.normalizedCoordinates = normalized_coordinates;
+ }
+}
+
+void
+mtl_sampler_descriptor_set_address_mode(mtl_sampler_descriptor *descriptor,
+ enum mtl_sampler_address_mode address_mode_u,
+ enum mtl_sampler_address_mode address_mode_v,
+ enum mtl_sampler_address_mode address_mode_w)
+{
+ @autoreleasepool {
+ MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor;
+ desc.sAddressMode = (MTLSamplerAddressMode)address_mode_u;
+ desc.tAddressMode = (MTLSamplerAddressMode)address_mode_v;
+ desc.rAddressMode = (MTLSamplerAddressMode)address_mode_w;
+ }
+}
+
+void
+mtl_sampler_descriptor_set_border_color(mtl_sampler_descriptor *descriptor, enum mtl_sampler_border_color color)
+{
+ @autoreleasepool {
+ MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor;
+ desc.borderColor = (MTLSamplerBorderColor)color;
+ }
+}
+
+void
+mtl_sampler_descriptor_set_filters(mtl_sampler_descriptor *descriptor,
+ enum mtl_sampler_min_mag_filter min_filter,
+ enum mtl_sampler_min_mag_filter mag_filter,
+ enum mtl_sampler_mip_filter mip_filter)
+{
+ @autoreleasepool {
+ MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor;
+ desc.minFilter = (MTLSamplerMinMagFilter)min_filter;
+ desc.magFilter = (MTLSamplerMinMagFilter)mag_filter;
+ desc.mipFilter = (MTLSamplerMipFilter)mip_filter;
+ }
+}
+
+void
+mtl_sampler_descriptor_set_lod_clamp(mtl_sampler_descriptor *descriptor,
+ float min,
+ float max)
+{
+ @autoreleasepool {
+ MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor;
+ desc.lodMinClamp = min;
+ desc.lodMaxClamp = max;
+ }
+}
+
+void
+mtl_sampler_descriptor_set_max_anisotropy(mtl_sampler_descriptor *descriptor,
+ uint64_t max)
+{
+ @autoreleasepool {
+ MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor;
+ desc.maxAnisotropy = max ? max : 1u; /* Metal requires a non-zero value */
+ }
+}
+
+void
+mtl_sampler_descriptor_set_compare_function(mtl_sampler_descriptor *descriptor,
+ enum mtl_compare_function function)
+{
+ @autoreleasepool {
+ MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor;
+ desc.compareFunction = (MTLCompareFunction)function;
+ }
+}
+
+mtl_sampler *
+mtl_new_sampler(mtl_device *device, mtl_sampler_descriptor *descriptor)
+{
+ @autoreleasepool {
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor;
+ return [dev newSamplerStateWithDescriptor:desc];
+ }
+}
+
+uint64_t
+mtl_sampler_get_gpu_resource_id(mtl_sampler *sampler)
+{
+ @autoreleasepool {
+ id<MTLSamplerState> samp = (id<MTLSamplerState>)sampler;
+ return [samp gpuResourceID]._impl;
+ }
+}
diff --git a/src/kosmickrisp/bridge/mtl_sync.h b/src/kosmickrisp/bridge/mtl_sync.h
new file mode 100644
index 00000000000..8df0057cd75
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_sync.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MTL_SYNC_H
+#define MTL_SYNC_H 1
+
+#include "mtl_types.h"
+
+#include <inttypes.h>
+
+/* MTLFence */
+mtl_fence *mtl_new_fence(mtl_device *device);
+
+/* MTLEvent */
+mtl_event *mtl_new_event(mtl_device *device);
+
+/* MTLSharedEvent */
+mtl_shared_event *mtl_new_shared_event(mtl_device *device);
+int mtl_shared_event_wait_until_signaled_value(mtl_shared_event *event_handle,
+ uint64_t value,
+ uint64_t timeout_ms);
+uint64_t mtl_shared_event_get_signaled_value(mtl_shared_event *event_handle);
+void mtl_shared_event_set_signaled_value(mtl_shared_event *event_handle,
+ uint64_t value);
+
+#endif /* MTL_SYNC_H */ \ No newline at end of file
diff --git a/src/kosmickrisp/bridge/mtl_sync.m b/src/kosmickrisp/bridge/mtl_sync.m
new file mode 100644
index 00000000000..d7f5b211318
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_sync.m
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_sync.h"
+
+#include <Metal/MTLEvent.h>
+
+/* MTLFence */
+mtl_fence *
+mtl_new_fence(mtl_device *device)
+{
+ @autoreleasepool {
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ return (mtl_fence *)[dev newFence];
+ }
+}
+
+/* MTLEvent */
+mtl_event *
+mtl_new_event(mtl_device *device)
+{
+ @autoreleasepool {
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ return [dev newEvent];
+ }
+}
+
+/* MTLSharedEvent */
+mtl_shared_event *
+mtl_new_shared_event(mtl_device *device)
+{
+ @autoreleasepool {
+ id<MTLDevice> dev = (id<MTLDevice>)device;
+ return [dev newSharedEvent];
+ }
+}
+
+int
+mtl_shared_event_wait_until_signaled_value(mtl_shared_event *event_handle, uint64_t value, uint64_t timeout_ms)
+{
+ @autoreleasepool {
+ id<MTLSharedEvent> event = (id<MTLSharedEvent>)event_handle;
+ return (int)[event waitUntilSignaledValue:value timeoutMS:timeout_ms];
+ }
+}
+
+void
+mtl_shared_event_set_signaled_value(mtl_shared_event *event_handle, uint64_t value)
+{
+ @autoreleasepool {
+ id<MTLSharedEvent> event = (id<MTLSharedEvent>)event_handle;
+ event.signaledValue = value;
+ }
+}
+
+uint64_t
+mtl_shared_event_get_signaled_value(mtl_shared_event *event_handle)
+{
+ @autoreleasepool {
+ id<MTLSharedEvent> event = (id<MTLSharedEvent>)event_handle;
+ return event.signaledValue;
+ }
+}
diff --git a/src/kosmickrisp/bridge/mtl_texture.h b/src/kosmickrisp/bridge/mtl_texture.h
new file mode 100644
index 00000000000..ab1de341b01
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_texture.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef MTL_TEXTURE_H
+#define MTL_TEXTURE_H 1
+
+#include "mtl_types.h"
+
+#include <inttypes.h>
+
+/* TODO_KOSMICKRISP Move this to bridge. */
+struct kk_view_layout;
+
+/* Utils*/
+uint64_t mtl_texture_get_gpu_resource_id(mtl_texture *texture);
+
+/* Texture view creation */
+mtl_texture *mtl_new_texture_view_with(mtl_texture *texture,
+ const struct kk_view_layout *layout);
+mtl_texture *
+mtl_new_texture_view_with_no_swizzle(mtl_texture *texture,
+ const struct kk_view_layout *layout);
+
+#endif /* MTL_TEXTURE_H */
diff --git a/src/kosmickrisp/bridge/mtl_texture.m b/src/kosmickrisp/bridge/mtl_texture.m
new file mode 100644
index 00000000000..5042e7f2926
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_texture.m
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_texture.h"
+
+/* TODO_LUNARG Remove */
+#include "kk_image_layout.h"
+
+/* TODO_LUNARG Remove */
+#include "vulkan/vulkan.h"
+
+#include <Metal/MTLTexture.h>
+
+uint64_t
+mtl_texture_get_gpu_resource_id(mtl_texture *texture)
+{
+ @autoreleasepool {
+ id<MTLTexture> tex = (id<MTLTexture>)texture;
+ return (uint64_t)[tex gpuResourceID]._impl;
+ }
+}
+
+/* TODO_KOSMICKRISP This should be part of the mapping */
+static uint32_t
+mtl_texture_view_type(uint32_t type, uint8_t sample_count)
+{
+ switch (type) {
+ case VK_IMAGE_VIEW_TYPE_1D:
+ return MTLTextureType1D;
+ case VK_IMAGE_VIEW_TYPE_1D_ARRAY:
+ return MTLTextureType1DArray;
+ case VK_IMAGE_VIEW_TYPE_2D:
+ return sample_count > 1u ? MTLTextureType2DMultisample : MTLTextureType2D;;
+ case VK_IMAGE_VIEW_TYPE_CUBE:
+ return MTLTextureTypeCube;
+ case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY:
+ return MTLTextureTypeCubeArray;
+ case VK_IMAGE_VIEW_TYPE_2D_ARRAY:
+ return sample_count > 1u ? MTLTextureType2DMultisampleArray : MTLTextureType2DArray;
+ case VK_IMAGE_VIEW_TYPE_3D:
+ return MTLTextureType3D;
+ default:
+ assert(false && "Unsupported VkViewType");
+ return MTLTextureType1D;
+ }
+}
+
+static MTLTextureSwizzle
+mtl_texture_swizzle(enum pipe_swizzle swizzle)
+{
+ const MTLTextureSwizzle map[] =
+ {
+ [PIPE_SWIZZLE_X] = MTLTextureSwizzleRed,
+ [PIPE_SWIZZLE_Y] = MTLTextureSwizzleGreen,
+ [PIPE_SWIZZLE_Z] = MTLTextureSwizzleBlue,
+ [PIPE_SWIZZLE_W] = MTLTextureSwizzleAlpha,
+ [PIPE_SWIZZLE_0] = MTLTextureSwizzleZero,
+ [PIPE_SWIZZLE_1] = MTLTextureSwizzleOne,
+ };
+
+ return map[swizzle];
+}
+
+mtl_texture *
+mtl_new_texture_view_with(mtl_texture *texture, const struct kk_view_layout *layout)
+{
+ @autoreleasepool {
+ id<MTLTexture> tex = (id<MTLTexture>)texture;
+ MTLTextureType type = mtl_texture_view_type(layout->view_type, layout->sample_count_sa);
+ NSRange levels = NSMakeRange(layout->base_level, layout->num_levels);
+ NSRange slices = NSMakeRange(layout->base_array_layer, layout->array_len);
+ MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake(mtl_texture_swizzle(layout->swizzle.red),
+ mtl_texture_swizzle(layout->swizzle.green),
+ mtl_texture_swizzle(layout->swizzle.blue),
+ mtl_texture_swizzle(layout->swizzle.alpha));
+ return [tex newTextureViewWithPixelFormat:layout->format.mtl textureType:type levels:levels slices:slices swizzle:swizzle];
+ }
+}
+
+mtl_texture *
+mtl_new_texture_view_with_no_swizzle(mtl_texture *texture, const struct kk_view_layout *layout)
+{
+ @autoreleasepool {
+ id<MTLTexture> tex = (id<MTLTexture>)texture;
+ MTLTextureType type = mtl_texture_view_type(layout->view_type, layout->sample_count_sa);
+ NSRange levels = NSMakeRange(layout->base_level, layout->num_levels);
+ NSRange slices = NSMakeRange(layout->base_array_layer, layout->array_len);
+ return [tex newTextureViewWithPixelFormat:layout->format.mtl textureType:type levels:levels slices:slices];
+ }
+}
+
diff --git a/src/kosmickrisp/bridge/mtl_types.h b/src/kosmickrisp/bridge/mtl_types.h
new file mode 100644
index 00000000000..90bfbdb8315
--- /dev/null
+++ b/src/kosmickrisp/bridge/mtl_types.h
@@ -0,0 +1,272 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ *
+ * Structures and enums found in this file are a 1-1 mapping of Metal's
+ * equivalents
+ */
+
+#ifndef KK_MTL_TYPES_H
+#define KK_MTL_TYPES_H 1
+
+#include <stddef.h> /* For size_t definition */
+
+/** HANDLES */
+typedef void mtl_device;
+typedef void mtl_heap;
+typedef void mtl_buffer;
+typedef void mtl_texture;
+typedef void mtl_command_queue;
+typedef void mtl_command_buffer;
+typedef void mtl_command_encoder;
+typedef void mtl_blit_encoder;
+typedef void mtl_compute_encoder;
+typedef void mtl_render_encoder;
+typedef void mtl_event;
+typedef void mtl_shared_event;
+typedef void mtl_sampler_descriptor;
+typedef void mtl_sampler;
+typedef void mtl_compute_pipeline_state;
+typedef void mtl_library;
+typedef void mtl_render_pipeline_state;
+typedef void mtl_function;
+typedef void mtl_resource;
+typedef void mtl_render_pass_descriptor;
+typedef void mtl_render_pipeline_descriptor;
+typedef void mtl_fence;
+typedef void mtl_stencil_descriptor;
+typedef void mtl_depth_stencil_descriptor;
+typedef void mtl_depth_stencil_state;
+typedef void mtl_render_pass_attachment_descriptor;
+
+/** ENUMS */
+enum mtl_cpu_cache_mode {
+ MTL_CPU_CACHE_MODE_DEFAULT_CACHE = 0,
+ MTL_CPU_CACHE_MODE_WRITE_COMBINED = 1,
+};
+
+enum mtl_storage_mode {
+ MTL_STORAGE_MODE_SHARED = 0,
+ MTL_STORAGE_MODE_MANAGED = 1,
+ MTL_STORAGE_MODE_PRIVATE = 2,
+ MTL_STORAGE_MODE_MEMORYLESS = 3,
+};
+
+enum mtl_hazard_tracking_mode {
+ MTL_HAZARD_TRACKING_MODE_DFEAULT = 0,
+ MTL_HAZARD_TRACKING_MODE_UNTRACKED = 1,
+ MTL_HAZARD_TRACKING_MODE_TRACKED = 2,
+};
+
+#define MTL_RESOURCE_CPU_CACHE_MODE_SHIFT 0
+#define MTL_RESOURCE_STORAGE_MODE_SHIFT 4
+#define MTL_RESOURCE_HAZARD_TRACKING_MODE_SHIFT 8
+enum mtl_resource_options {
+ MTL_RESOURCE_CPU_CACHE_MODE_DEFAULT_CACHE =
+ MTL_CPU_CACHE_MODE_DEFAULT_CACHE << MTL_RESOURCE_CPU_CACHE_MODE_SHIFT,
+ MTL_RESOURCE_CPU_CACHE_MODE_WRITE_COMBINED =
+ MTL_CPU_CACHE_MODE_WRITE_COMBINED << MTL_RESOURCE_CPU_CACHE_MODE_SHIFT,
+ MTL_RESOURCE_STORAGE_MODE_SHARED = MTL_STORAGE_MODE_SHARED
+ << MTL_RESOURCE_STORAGE_MODE_SHIFT,
+ MTL_RESOURCE_STORAGE_MODE_PRIVATE = MTL_STORAGE_MODE_PRIVATE
+ << MTL_RESOURCE_STORAGE_MODE_SHIFT,
+ MTL_RESOURCE_TRACKING_MODE_DEFAULT =
+ MTL_HAZARD_TRACKING_MODE_DFEAULT
+ << MTL_RESOURCE_HAZARD_TRACKING_MODE_SHIFT,
+ MTL_RESOURCE_TRACKING_MODE_UNTRACKED =
+ MTL_HAZARD_TRACKING_MODE_UNTRACKED
+ << MTL_RESOURCE_HAZARD_TRACKING_MODE_SHIFT,
+ MTL_RESOURCE_TRACKING_MODE_TRACKED =
+ MTL_HAZARD_TRACKING_MODE_TRACKED
+ << MTL_RESOURCE_HAZARD_TRACKING_MODE_SHIFT,
+};
+
+enum mtl_blit_options {
+ MTL_BLIT_OPTION_NONE = 0,
+ MTL_BLIT_OPTION_DEPTH_FROM_DEPTH_STENCIL = 1 << 0,
+ MTL_BLIT_OPTION_STENCIL_FROM_DEPTH_STENCIL = 1 << 1,
+};
+
+enum mtl_resource_usage {
+ MTL_RESOURCE_USAGE_READ = 1 << 0,
+ MTL_RESOURCE_USAGE_WRITE = 1 << 1,
+};
+
+enum mtl_primitive_type {
+ MTL_PRIMITIVE_TYPE_POINT = 0,
+ MTL_PRIMITIVE_TYPE_LINE = 1,
+ MTL_PRIMITIVE_TYPE_LINE_STRIP = 2,
+ MTL_PRIMITIVE_TYPE_TRIANGLE = 3,
+ MTL_PRIMITIVE_TYPE_TRIANGLE_STRIP = 4,
+};
+
+enum mtl_primitive_topology_class {
+ MTL_PRIMITIVE_TOPOLOGY_CLASS_UNSPECIFIED = 0,
+ MTL_PRIMITIVE_TOPOLOGY_CLASS_POINT = 1,
+ MTL_PRIMITIVE_TOPOLOGY_CLASS_LINE = 2,
+ MTL_PRIMITIVE_TOPOLOGY_CLASS_TRIANGLE = 3,
+};
+
+enum mtl_texture_type {
+ MTL_TEXTURE_TYPE_1D = 0u,
+ MTL_TEXTURE_TYPE_1D_ARRAY = 1u,
+ MTL_TEXTURE_TYPE_2D = 2u,
+ MTL_TEXTURE_TYPE_2D_ARRAY = 3u,
+ MTL_TEXTURE_TYPE_2D_MULTISAMPLE = 4u,
+ MTL_TEXTURE_TYPE_CUBE = 5u,
+ MTL_TEXTURE_TYPE_CUBE_ARRAY = 6u,
+ MTL_TEXTURE_TYPE_3D = 7u,
+ MTL_TEXTURE_TYPE_2D_ARRAY_MULTISAMPLE = 8u,
+ MTL_TEXTURE_TYPE_TEXTURE_BUFFER = 9u,
+};
+
+enum mtl_texture_usage {
+ MTL_TEXTURE_USAGE_UNKNOWN = 0x0000,
+ MTL_TEXTURE_USAGE_SHADER_READ = 0x0001,
+ MTL_TEXTURE_USAGE_SHADER_WRITE = 0X0002,
+ MTL_TEXTURE_USAGE_RENDER_TARGET = 0X0004,
+ MTL_TEXTURE_USAGE_PIXEL_FORMAT_VIEW = 0X0010,
+ MTL_TEXTURE_USAGE_SHADER_ATOMIC = 0X0020,
+};
+
+enum mtl_load_action {
+ MTL_LOAD_ACTION_DONT_CARE = 0u,
+ MTL_LOAD_ACTION_LOAD = 1u,
+ MTL_LOAD_ACTION_CLEAR = 2u,
+};
+
+enum mtl_store_action {
+ MTL_STORE_ACTION_DONT_CARE = 0u,
+ MTL_STORE_ACTION_STORE = 1u,
+ MTL_STORE_ACTION_MULTISAMPLE_RESOLVE = 2u,
+ MTL_STORE_ACTION_STORE_AND_MULTISAMPLE_RESOLVE = 3u,
+ MTL_STORE_ACTION_UNKNOWN = 4u,
+ MTL_STORE_ACTION_CUSTOM_SAMPLE_DEPTH_STORE = 5u,
+};
+
+enum mtl_texture_swizzle {
+ MTL_TEXTURE_SWIZZLE_ZERO = 0,
+ MTL_TEXTURE_SWIZZLE_ONE = 1,
+ MTL_TEXTURE_SWIZZLE_RED = 2,
+ MTL_TEXTURE_SWIZZLE_GREEN = 3,
+ MTL_TEXTURE_SWIZZLE_BLUE = 4,
+ MTL_TEXTURE_SWIZZLE_ALPHA = 5,
+};
+
+enum mtl_index_type {
+ MTL_INDEX_TYPE_UINT16 = 0,
+ MTL_INDEX_TYPE_UINT32 = 1,
+};
+
+enum mtl_sampler_address_mode {
+ MTL_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE = 0,
+ MTL_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE = 1,
+ MTL_SAMPLER_ADDRESS_MODE_REPEAT = 2,
+ MTL_SAMPLER_ADDRESS_MODE_MIRROR_REPEAT = 3,
+ MTL_SAMPLER_ADDRESS_MODE_CLAMP_TO_ZERO = 4,
+ MTL_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER_COLOR = 5,
+};
+
+enum mtl_sampler_border_color {
+ MTL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0,
+ MTL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1,
+ MTL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2,
+};
+
+enum mtl_sampler_min_mag_filter {
+ MTL_SAMPLER_MIN_MAG_FILTER_NEAREST = 0,
+ MTL_SAMPLER_MIN_MAG_FILTER_LINEAR = 1,
+};
+
+enum mtl_sampler_mip_filter {
+ MTL_SAMPLER_MIP_FILTER_NOT_MIP_MAPPED = 0,
+ MTL_SAMPLER_MIP_FILTER_NEAREST = 1,
+ MTL_SAMPLER_MIP_FILTER_LINEAR = 2,
+};
+
+enum mtl_compare_function {
+ MTL_COMPARE_FUNCTION_NEVER = 0,
+ MTL_COMPARE_FUNCTION_LESS = 1,
+ MTL_COMPARE_FUNCTION_EQUAL = 2,
+ MTL_COMPARE_FUNCTION_LESS_EQUAL = 3,
+ MTL_COMPARE_FUNCTION_GREATER = 4,
+ MTL_COMPARE_FUNCTION_NOT_EQUAL = 5,
+ MTL_COMPARE_FUNCTION_GREATER_EQUAL = 6,
+ MTL_COMPARE_FUNCTION_ALWAYS = 7,
+};
+
+enum mtl_winding {
+ MTL_WINDING_CLOCKWISE = 0,
+ MTL_WINDING_COUNTER_CLOCKWISE = 1,
+};
+
+enum mtl_cull_mode {
+ MTL_CULL_MODE_NONE = 0,
+ MTL_CULL_MODE_FRONT = 1,
+ MTL_CULL_MODE_BACK = 2,
+};
+
+enum mtl_visibility_result_mode {
+ MTL_VISIBILITY_RESULT_MODE_DISABLED = 0,
+ MTL_VISIBILITY_RESULT_MODE_BOOLEAN = 1,
+ MTL_VISIBILITY_RESULT_MODE_COUNTING = 2,
+};
+
+enum mtl_depth_clip_mode {
+ MTL_DEPTH_CLIP_MODE_CLIP = 0,
+ MTL_DEPTH_CLIP_MODE_CLAMP = 1,
+};
+
+/** STRUCTURES */
+struct mtl_range {
+ size_t offset;
+ size_t length;
+};
+
+struct mtl_origin {
+ size_t x, y, z;
+};
+
+struct mtl_size {
+ size_t x, y, z;
+};
+
+struct mtl_viewport {
+ double originX, originY, width, height, znear, zfar;
+};
+
+struct mtl_clear_color {
+ union {
+ struct {
+ double red, green, blue, alpha;
+ };
+ double channel[4];
+ };
+};
+
+struct mtl_scissor_rect {
+ size_t x, y, width, height;
+};
+
+struct mtl_texture_swizzle_channels {
+ enum mtl_texture_swizzle red;
+ enum mtl_texture_swizzle green;
+ enum mtl_texture_swizzle blue;
+ enum mtl_texture_swizzle alpha;
+};
+
+struct mtl_buffer_image_copy {
+ struct mtl_size image_size;
+ struct mtl_origin image_origin;
+ mtl_buffer *buffer;
+ mtl_texture *image;
+ size_t buffer_offset_B;
+ size_t buffer_stride_B;
+ size_t buffer_2d_image_size_B;
+ size_t image_slice;
+ size_t image_level;
+ enum mtl_blit_options options;
+};
+
+#endif /* KK_MTL_TYPES_H */
diff --git a/src/kosmickrisp/bridge/stubs/mtl_bridge.c b/src/kosmickrisp/bridge/stubs/mtl_bridge.c
new file mode 100644
index 00000000000..10628fcf89c
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_bridge.c
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_bridge.h"
+
+mtl_texture *
+mtl_drawable_get_texture(void *drawable_ptr)
+{
+ return NULL;
+}
+
+void *
+mtl_retain(void *handle)
+{
+ return NULL;
+}
+
+void
+mtl_release(void *handle)
+{
+}
diff --git a/src/kosmickrisp/bridge/stubs/mtl_buffer.c b/src/kosmickrisp/bridge/stubs/mtl_buffer.c
new file mode 100644
index 00000000000..ee98a5c88e2
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_buffer.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_buffer.h"
+
+uint64_t
+mtl_buffer_get_length(mtl_buffer *buffer)
+{
+ return 0u;
+}
+
+uint64_t
+mtl_buffer_get_gpu_address(mtl_buffer *buffer)
+{
+ return 0u;
+}
+
+void *
+mtl_get_contents(mtl_buffer *buffer)
+{
+ return NULL;
+}
+
+mtl_texture *
+mtl_new_texture_with_descriptor_linear(mtl_buffer *buffer,
+ const struct kk_image_layout *layout,
+ uint64_t offset)
+{
+ return NULL;
+}
diff --git a/src/kosmickrisp/bridge/stubs/mtl_command_buffer.c b/src/kosmickrisp/bridge/stubs/mtl_command_buffer.c
new file mode 100644
index 00000000000..ff36e37aa9d
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_command_buffer.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_command_buffer.h"
+
+void
+mtl_encode_signal_event(mtl_command_buffer *cmd_buf_handle,
+ mtl_event *event_handle, uint64_t value)
+{
+}
+
+void
+mtl_encode_wait_for_event(mtl_command_buffer *cmd_buf_handle,
+ mtl_event *event_handle, uint64_t value)
+{
+}
+
+void
+mtl_add_completed_handler(mtl_command_buffer *cmd, void (*callback)(void *data),
+ void *data)
+{
+}
+
+void
+mtl_command_buffer_commit(mtl_command_buffer *cmd_buf)
+{
+}
+
+void
+mtl_present_drawable(mtl_command_buffer *cmd_buf, void *drawable)
+{
+} \ No newline at end of file
diff --git a/src/kosmickrisp/bridge/stubs/mtl_command_queue.c b/src/kosmickrisp/bridge/stubs/mtl_command_queue.c
new file mode 100644
index 00000000000..aa39a3446ee
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_command_queue.c
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_command_queue.h"
+
+mtl_command_queue *
+mtl_new_command_queue(mtl_device *device, uint32_t cmd_buffer_count)
+{
+ return NULL;
+}
+
+mtl_command_buffer *
+mtl_new_command_buffer(mtl_command_queue *cmd_queue)
+{
+ return NULL;
+}
diff --git a/src/kosmickrisp/bridge/stubs/mtl_compute_state.c b/src/kosmickrisp/bridge/stubs/mtl_compute_state.c
new file mode 100644
index 00000000000..f7a8a6aa26f
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_compute_state.c
@@ -0,0 +1,14 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_compute_state.h"
+
+mtl_compute_pipeline_state *
+mtl_new_compute_pipeline_state(mtl_device *device, mtl_function *function,
+ uint64_t max_total_threads_per_threadgroup)
+{
+ return NULL;
+}
diff --git a/src/kosmickrisp/bridge/stubs/mtl_device.c b/src/kosmickrisp/bridge/stubs/mtl_device.c
new file mode 100644
index 00000000000..78e22e30c12
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_device.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_device.h"
+
+/* Device creation */
+mtl_device *
+mtl_device_create(void)
+{
+ return NULL;
+}
+
+/* Device operations */
+void
+mtl_start_gpu_capture(mtl_device *mtl_dev_handle)
+{
+}
+
+void
+mtl_stop_gpu_capture(void)
+{
+}
+
+/* Device feature query */
+void
+mtl_device_get_name(mtl_device *dev, char buffer[256])
+{
+}
+
+void
+mtl_device_get_architecture_name(mtl_device *dev, char buffer[256])
+{
+}
+
+uint64_t
+mtl_device_get_peer_group_id(mtl_device *dev)
+{
+ return 0u;
+}
+
+uint32_t
+mtl_device_get_peer_index(mtl_device *dev)
+{
+ return 0u;
+}
+
+uint64_t
+mtl_device_get_registry_id(mtl_device *dev)
+{
+ return 0u;
+}
+
+struct mtl_size
+mtl_device_max_threads_per_threadgroup(mtl_device *dev)
+{
+ return (struct mtl_size){};
+}
+
+/* Resource queries */
+void
+mtl_heap_buffer_size_and_align_with_length(mtl_device *device, uint64_t *size_B,
+ uint64_t *align_B)
+{
+}
+
+void
+mtl_heap_texture_size_and_align_with_descriptor(mtl_device *device,
+ struct kk_image_layout *layout)
+{
+}
diff --git a/src/kosmickrisp/bridge/stubs/mtl_encoder.c b/src/kosmickrisp/bridge/stubs/mtl_encoder.c
new file mode 100644
index 00000000000..672e628c1dc
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_encoder.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_encoder.h"
+
+/* Common encoder utils */
+void
+mtl_end_encoding(void *encoder)
+{
+}
+
+/* MTLBlitEncoder */
+mtl_blit_encoder *
+mtl_new_blit_command_encoder(mtl_command_buffer *cmd_buffer)
+{
+ return NULL;
+}
+
+void
+mtl_blit_update_fence(mtl_blit_encoder *encoder, mtl_fence *fence)
+{
+}
+
+void
+mtl_blit_wait_for_fence(mtl_blit_encoder *encoder, mtl_fence *fence)
+{
+}
+
+void
+mtl_copy_from_buffer_to_buffer(mtl_blit_encoder *blit_enc_handle,
+ mtl_buffer *src_buf, size_t src_offset,
+ mtl_buffer *dst_buf, size_t dst_offset,
+ size_t size)
+{
+}
+
+void
+mtl_copy_from_buffer_to_texture(mtl_blit_encoder *blit_enc_handle,
+ struct mtl_buffer_image_copy *data)
+{
+}
+
+void
+mtl_copy_from_texture_to_buffer(mtl_blit_encoder *blit_enc_handle,
+ struct mtl_buffer_image_copy *data)
+{
+}
+
+void
+mtl_copy_from_texture_to_texture(mtl_blit_encoder *blit_enc_handle,
+ mtl_texture *src_tex_handle, size_t src_slice,
+ size_t src_level, struct mtl_origin src_origin,
+ struct mtl_size src_size,
+ mtl_texture *dst_tex_handle, size_t dst_slice,
+ size_t dst_level, struct mtl_origin dst_origin)
+{
+}
+
+/* MTLComputeEncoder */
+mtl_compute_encoder *
+mtl_new_compute_command_encoder(mtl_command_buffer *cmd_buffer)
+{
+ return NULL;
+}
+
+void
+mtl_compute_update_fence(mtl_compute_encoder *encoder, mtl_fence *fence)
+{
+}
+
+void
+mtl_compute_wait_for_fence(mtl_compute_encoder *encoder, mtl_fence *fence)
+{
+}
+
+void
+mtl_compute_set_pipeline_state(mtl_compute_encoder *encoder,
+ mtl_compute_pipeline_state *state_handle)
+{
+}
+
+void
+mtl_compute_set_buffer(mtl_compute_encoder *encoder, mtl_buffer *buffer,
+ size_t offset, size_t index)
+{
+}
+
+void
+mtl_compute_use_resource(mtl_compute_encoder *encoder, mtl_resource *res_handle,
+ uint32_t usage)
+{
+}
+
+void
+mtl_compute_use_resources(mtl_compute_encoder *encoder,
+ mtl_resource **resource_handles, uint32_t count,
+ enum mtl_resource_usage usage)
+{
+}
+
+void
+mtl_compute_use_heaps(mtl_compute_encoder *encoder, mtl_heap **heaps,
+ uint32_t count)
+{
+}
+
+void
+mtl_dispatch_threads(mtl_compute_encoder *encoder, struct mtl_size grid_size,
+ struct mtl_size local_size)
+{
+}
+
+void
+mtl_dispatch_threadgroups_with_indirect_buffer(mtl_compute_encoder *encoder,
+ mtl_buffer *buffer,
+ uint32_t offset,
+ struct mtl_size local_size)
+{
+}
+
+/* MTLRenderEncoder */
+mtl_render_encoder *
+mtl_new_render_command_encoder_with_descriptor(
+ mtl_command_buffer *command_buffer, mtl_render_pass_descriptor *descriptor)
+{
+ return NULL;
+}
+
+void
+mtl_render_update_fence(mtl_render_encoder *encoder, mtl_fence *fence)
+{
+}
+
+void
+mtl_render_wait_for_fence(mtl_render_encoder *encoder, mtl_fence *fence)
+{
+}
+
+void
+mtl_set_viewports(mtl_render_encoder *encoder, struct mtl_viewport *viewports,
+ uint32_t count)
+{
+}
+
+void
+mtl_set_scissor_rects(mtl_render_encoder *encoder,
+ struct mtl_scissor_rect *scissor_rects, uint32_t count)
+{
+}
+
+void
+mtl_render_set_pipeline_state(mtl_render_encoder *encoder,
+ mtl_render_pipeline_state *pipeline)
+{
+}
+
+void
+mtl_set_depth_stencil_state(mtl_render_encoder *encoder,
+ mtl_depth_stencil_state *state)
+{
+}
+
+void
+mtl_set_stencil_references(mtl_render_encoder *encoder, uint32_t front,
+ uint32_t back)
+{
+}
+
+void
+mtl_set_front_face_winding(mtl_render_encoder *encoder,
+ enum mtl_winding winding)
+{
+}
+
+void
+mtl_set_cull_mode(mtl_render_encoder *encoder, enum mtl_cull_mode mode)
+{
+}
+
+void
+mtl_set_visibility_result_mode(mtl_render_encoder *encoder,
+ enum mtl_visibility_result_mode mode,
+ size_t offset)
+{
+}
+
+void
+mtl_set_depth_bias(mtl_render_encoder *encoder, float depth_bias,
+ float slope_scale, float clamp)
+{
+}
+
+void
+mtl_set_depth_clip_mode(mtl_render_encoder *encoder,
+ enum mtl_depth_clip_mode mode)
+{
+}
+
+void
+mtl_set_vertex_amplification_count(mtl_render_encoder *encoder,
+ uint32_t *layer_ids, uint32_t id_count)
+{
+}
+
+void
+mtl_set_vertex_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer,
+ uint32_t offset, uint32_t index)
+{
+}
+
+void
+mtl_set_fragment_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer,
+ uint32_t offset, uint32_t index)
+{
+}
+
+void
+mtl_draw_primitives(mtl_render_encoder *encoder,
+ enum mtl_primitive_type primitve_type, uint32_t vertexStart,
+ uint32_t vertexCount, uint32_t instanceCount,
+ uint32_t baseInstance)
+{
+}
+
+void
+mtl_draw_indexed_primitives(
+ mtl_render_encoder *encoder, enum mtl_primitive_type primitve_type,
+ uint32_t index_count, enum mtl_index_type index_type,
+ mtl_buffer *index_buffer, uint32_t index_buffer_offset,
+ uint32_t instance_count, int32_t base_vertex, uint32_t base_instance)
+{
+}
+
+void
+mtl_draw_primitives_indirect(mtl_render_encoder *encoder,
+ enum mtl_primitive_type primitve_type,
+ mtl_buffer *indirect_buffer,
+ uint64_t indirect_buffer_offset)
+{
+}
+
+void
+mtl_draw_indexed_primitives_indirect(mtl_render_encoder *encoder,
+ enum mtl_primitive_type primitve_type,
+ enum mtl_index_type index_type,
+ mtl_buffer *index_buffer,
+ uint32_t index_buffer_offset,
+ mtl_buffer *indirect_buffer,
+ uint64_t indirect_buffer_offset)
+{
+}
+
+void
+mtl_render_use_resource(mtl_compute_encoder *encoder, mtl_resource *res_handle,
+ uint32_t usage)
+{
+}
+
+void
+mtl_render_use_resources(mtl_render_encoder *encoder,
+ mtl_resource **resource_handles, uint32_t count,
+ enum mtl_resource_usage usage)
+{
+}
+
+void
+mtl_render_use_heaps(mtl_render_encoder *encoder, mtl_heap **heaps,
+ uint32_t count)
+{
+}
diff --git a/src/kosmickrisp/bridge/stubs/mtl_heap.c b/src/kosmickrisp/bridge/stubs/mtl_heap.c
new file mode 100644
index 00000000000..de916c4d76c
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_heap.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_heap.h"
+
+/* Creation */
+mtl_heap *
+mtl_new_heap(mtl_device *device, uint64_t size,
+ enum mtl_resource_options resource_options)
+{
+ return NULL;
+}
+
+/* Utils */
+uint64_t
+mtl_heap_get_size(mtl_heap *heap)
+{
+ return 0u;
+}
+
+/* Allocation from heap */
+mtl_buffer *
+mtl_new_buffer_with_length(mtl_heap *heap, uint64_t size_B, uint64_t offset_B)
+{
+ return NULL;
+}
+
+mtl_texture *
+mtl_new_texture_with_descriptor(mtl_heap *heap,
+ const struct kk_image_layout *layout,
+ uint64_t offset)
+{
+ return NULL;
+}
diff --git a/src/kosmickrisp/bridge/stubs/mtl_library.c b/src/kosmickrisp/bridge/stubs/mtl_library.c
new file mode 100644
index 00000000000..dcd41781e8b
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_library.c
@@ -0,0 +1,19 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_library.h"
+
+mtl_library *
+mtl_new_library(mtl_device *device, const char *src)
+{
+ return NULL;
+}
+
+mtl_function *
+mtl_new_function_with_name(mtl_library *lib, const char *entry_point)
+{
+ return NULL;
+}
diff --git a/src/kosmickrisp/bridge/stubs/mtl_render_state.c b/src/kosmickrisp/bridge/stubs/mtl_render_state.c
new file mode 100644
index 00000000000..e971c5e460f
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_render_state.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_render_state.h"
+
+#include "mtl_format.h"
+
+/* TODO_KOSMICKRISP Remove */
+#include "vulkan/vulkan.h"
+
+/* Render pass descriptor */
+mtl_render_pass_descriptor *
+mtl_new_render_pass_descriptor(void)
+{
+ return NULL;
+}
+
+mtl_render_pass_attachment_descriptor *
+mtl_render_pass_descriptor_get_color_attachment(
+ mtl_render_pass_descriptor *descriptor, uint32_t index)
+{
+ return NULL;
+}
+
+mtl_render_pass_attachment_descriptor *
+mtl_render_pass_descriptor_get_depth_attachment(
+ mtl_render_pass_descriptor *descriptor)
+{
+ return NULL;
+}
+
+mtl_render_pass_attachment_descriptor *
+mtl_render_pass_descriptor_get_stencil_attachment(
+ mtl_render_pass_descriptor *descriptor)
+{
+ return NULL;
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_texture(
+ mtl_render_pass_attachment_descriptor *descriptor, mtl_texture *texture)
+{
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_level(
+ mtl_render_pass_attachment_descriptor *descriptor, uint32_t level)
+{
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_slice(
+ mtl_render_pass_attachment_descriptor *descriptor, uint32_t slice)
+{
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_load_action(
+ mtl_render_pass_attachment_descriptor *descriptor,
+ enum mtl_load_action action)
+{
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_store_action(
+ mtl_render_pass_attachment_descriptor *descriptor,
+ enum mtl_store_action action)
+{
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_clear_color(
+ mtl_render_pass_attachment_descriptor *descriptor,
+ struct mtl_clear_color clear_color)
+{
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_clear_depth(
+ mtl_render_pass_attachment_descriptor *descriptor, double depth)
+{
+}
+
+void
+mtl_render_pass_attachment_descriptor_set_clear_stencil(
+ mtl_render_pass_attachment_descriptor *descriptor, uint32_t stencil)
+{
+}
+
+void
+mtl_render_pass_descriptor_set_render_target_array_length(
+ mtl_render_pass_descriptor *descriptor, uint32_t length)
+{
+}
+
+void
+mtl_render_pass_descriptor_set_render_target_width(
+ mtl_render_pass_descriptor *descriptor, uint32_t width)
+{
+}
+
+void
+mtl_render_pass_descriptor_set_render_target_height(
+ mtl_render_pass_descriptor *descriptor, uint32_t height)
+{
+}
+
+void
+mtl_render_pass_descriptor_set_default_raster_sample_count(
+ mtl_render_pass_descriptor *descriptor, uint32_t sample_count)
+{
+}
+
+void
+mtl_render_pass_descriptor_set_visibility_buffer(
+ mtl_render_pass_descriptor *descriptor, mtl_buffer *visibility_buffer)
+{
+}
+
+/* Render pipeline descriptor */
+mtl_render_pipeline_descriptor *
+mtl_new_render_pipeline_descriptor(void)
+{
+ return NULL;
+}
+
+void
+mtl_render_pipeline_descriptor_set_vertex_shader(
+ mtl_render_pass_descriptor *descriptor, mtl_function *shader)
+{
+}
+
+void
+mtl_render_pipeline_descriptor_set_fragment_shader(
+ mtl_render_pass_descriptor *descriptor, mtl_function *shader)
+{
+}
+
+void
+mtl_render_pipeline_descriptor_set_input_primitive_topology(
+ mtl_render_pass_descriptor *descriptor,
+ enum mtl_primitive_topology_class topology_class)
+{
+}
+
+void
+mtl_render_pipeline_descriptor_set_color_attachment_format(
+ mtl_render_pass_descriptor *descriptor, uint8_t index,
+ enum mtl_pixel_format format)
+{
+}
+
+void
+mtl_render_pipeline_descriptor_set_depth_attachment_format(
+ mtl_render_pass_descriptor *descriptor, enum mtl_pixel_format format)
+{
+}
+
+void
+mtl_render_pipeline_descriptor_set_stencil_attachment_format(
+ mtl_render_pass_descriptor *descriptor, enum mtl_pixel_format format)
+{
+}
+
+void
+mtl_render_pipeline_descriptor_set_raster_sample_count(
+ mtl_render_pass_descriptor *descriptor, uint32_t sample_count)
+{
+}
+
+void
+mtl_render_pipeline_descriptor_set_alpha_to_coverage(
+ mtl_render_pass_descriptor *descriptor, bool enabled)
+{
+}
+
+void
+mtl_render_pipeline_descriptor_set_alpha_to_one(
+ mtl_render_pass_descriptor *descriptor, bool enabled)
+{
+}
+
+void
+mtl_render_pipeline_descriptor_set_rasterization_enabled(
+ mtl_render_pass_descriptor *descriptor, bool enabled)
+{
+}
+
+void
+mtl_render_pipeline_descriptor_set_max_vertex_amplification_count(
+ mtl_render_pass_descriptor *descriptor, uint32_t count)
+{
+}
+
+/* Render pipeline */
+mtl_render_pipeline_state *
+mtl_new_render_pipeline(mtl_device *device,
+ mtl_render_pass_descriptor *descriptor)
+{
+ return NULL;
+}
+
+/* Stencil descriptor */
+mtl_stencil_descriptor *
+mtl_new_stencil_descriptor(void)
+{
+ return NULL;
+}
+
+void
+mtl_stencil_descriptor_set_stencil_failure_operation(
+ mtl_stencil_descriptor *descriptor, enum VkStencilOp op)
+{
+}
+
+void
+mtl_stencil_descriptor_set_depth_failure_operation(
+ mtl_stencil_descriptor *descriptor, enum VkStencilOp op)
+{
+}
+
+void
+mtl_stencil_descriptor_set_depth_stencil_pass_operation(
+ mtl_stencil_descriptor *descriptor, enum VkStencilOp op)
+{
+}
+
+void
+mtl_stencil_descriptor_set_stencil_compare_function(
+ mtl_stencil_descriptor *descriptor, enum VkCompareOp op)
+{
+}
+
+void
+mtl_stencil_descriptor_set_read_mask(mtl_stencil_descriptor *descriptor,
+ uint32_t mask)
+{
+}
+
+void
+mtl_stencil_descriptor_set_write_mask(mtl_stencil_descriptor *descriptor,
+ uint32_t mask)
+{
+}
+
+/* Depth stencil descriptor */
+mtl_depth_stencil_descriptor *
+mtl_new_depth_stencil_descriptor(void)
+{
+ return NULL;
+}
+
+void
+mtl_depth_stencil_descriptor_set_depth_compare_function(
+ mtl_depth_stencil_descriptor *descriptor, enum VkCompareOp op)
+{
+}
+
+void
+mtl_depth_stencil_descriptor_set_depth_write_enabled(
+ mtl_depth_stencil_descriptor *descriptor, bool enable_write)
+{
+}
+
+void
+mtl_depth_stencil_descriptor_set_back_face_stencil(
+ mtl_depth_stencil_descriptor *descriptor,
+ mtl_stencil_descriptor *stencil_descriptor)
+{
+}
+
+void
+mtl_depth_stencil_descriptor_set_front_face_stencil(
+ mtl_depth_stencil_descriptor *descriptor,
+ mtl_stencil_descriptor *stencil_descriptor)
+{
+}
+
+/* Depth stencil state */
+mtl_depth_stencil_state *
+mtl_new_depth_stencil_state(mtl_device *device,
+ mtl_depth_stencil_descriptor *descriptor)
+{
+ return NULL;
+}
diff --git a/src/kosmickrisp/bridge/stubs/mtl_sampler.c b/src/kosmickrisp/bridge/stubs/mtl_sampler.c
new file mode 100644
index 00000000000..a3530b38440
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_sampler.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_sampler.h"
+
+/* Sampler descriptor */
+mtl_sampler_descriptor *
+mtl_new_sampler_descriptor(void)
+{
+ return NULL;
+}
+
+/* Sampler descriptor utils */
+void
+mtl_sampler_descriptor_set_normalized_coordinates(
+ mtl_sampler_descriptor *descriptor, bool normalized_coordinates)
+{
+}
+
+void
+mtl_sampler_descriptor_set_address_mode(
+ mtl_sampler_descriptor *descriptor,
+ enum mtl_sampler_address_mode address_mode_u,
+ enum mtl_sampler_address_mode address_mode_v,
+ enum mtl_sampler_address_mode address_mode_w)
+{
+}
+
+void
+mtl_sampler_descriptor_set_border_color(mtl_sampler_descriptor *descriptor,
+ enum mtl_sampler_border_color color)
+{
+}
+void
+mtl_sampler_descriptor_set_filters(mtl_sampler_descriptor *descriptor,
+ enum mtl_sampler_min_mag_filter min_filter,
+ enum mtl_sampler_min_mag_filter mag_filter,
+ enum mtl_sampler_mip_filter mip_filter)
+{
+}
+
+void
+mtl_sampler_descriptor_set_lod_clamp(mtl_sampler_descriptor *descriptor,
+ float min, float max)
+{
+}
+
+void
+mtl_sampler_descriptor_set_max_anisotropy(mtl_sampler_descriptor *descriptor,
+ uint64_t max)
+{
+}
+void
+mtl_sampler_descriptor_set_compare_function(mtl_sampler_descriptor *descriptor,
+ enum mtl_compare_function function)
+{
+}
+
+/* Sampler */
+mtl_sampler *
+mtl_new_sampler(mtl_device *device, mtl_sampler_descriptor *descriptor)
+{
+ return NULL;
+}
+
+/* Sampler utils */
+uint64_t
+mtl_sampler_get_gpu_resource_id(mtl_sampler *sampler)
+{
+ return 0u;
+}
diff --git a/src/kosmickrisp/bridge/stubs/mtl_sync.c b/src/kosmickrisp/bridge/stubs/mtl_sync.c
new file mode 100644
index 00000000000..0d4e1a8512e
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_sync.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_sync.h"
+
+/* MTLFence */
+mtl_fence *
+mtl_new_fence(mtl_device *device)
+{
+ return NULL;
+}
+
+/* MTLEvent */
+mtl_event *
+mtl_new_event(mtl_device *device)
+{
+ return NULL;
+}
+
+/* MTLSharedEvent */
+mtl_shared_event *
+mtl_new_shared_event(mtl_device *device)
+{
+ return NULL;
+}
+
+int
+mtl_shared_event_wait_until_signaled_value(mtl_shared_event *event_handle,
+ uint64_t value, uint64_t timeout_ms)
+{
+ return 0;
+}
+
+uint64_t
+mtl_shared_event_get_signaled_value(mtl_shared_event *event_handle)
+{
+ return 0u;
+}
+
+void
+mtl_shared_event_set_signaled_value(mtl_shared_event *event_handle,
+ uint64_t value)
+{
+}
diff --git a/src/kosmickrisp/bridge/stubs/mtl_texture.c b/src/kosmickrisp/bridge/stubs/mtl_texture.c
new file mode 100644
index 00000000000..8ed3b22ec95
--- /dev/null
+++ b/src/kosmickrisp/bridge/stubs/mtl_texture.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "mtl_texture.h"
+
+/* Utils*/
+uint64_t
+mtl_texture_get_gpu_resource_id(mtl_texture *texture)
+{
+ return 0u;
+}
+
+/* Texture view creation */
+mtl_texture *
+mtl_new_texture_view_with(mtl_texture *texture,
+ const struct kk_view_layout *layout)
+{
+ return NULL;
+}
+
+mtl_texture *
+mtl_new_texture_view_with_no_swizzle(mtl_texture *texture,
+ const struct kk_view_layout *layout)
+{
+ return NULL;
+}
diff --git a/src/kosmickrisp/bridge/vk_to_mtl_map.c b/src/kosmickrisp/bridge/vk_to_mtl_map.c
new file mode 100644
index 00000000000..ef586dc09a0
--- /dev/null
+++ b/src/kosmickrisp/bridge/vk_to_mtl_map.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "vk_to_mtl_map.h"
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "util/format/u_format.h"
+
+#include "vulkan/vulkan.h"
+#include "vk_meta.h"
+
+struct mtl_origin
+vk_offset_3d_to_mtl_origin(const struct VkOffset3D *offset)
+{
+ struct mtl_origin ret = {
+ .x = offset->x,
+ .y = offset->y,
+ .z = offset->z,
+ };
+ return ret;
+}
+
+struct mtl_size
+vk_extent_3d_to_mtl_size(const struct VkExtent3D *extent)
+{
+ struct mtl_size ret = {
+ .x = extent->width,
+ .y = extent->height,
+ .z = extent->depth,
+ };
+ return ret;
+}
+
+enum mtl_primitive_type
+vk_primitive_topology_to_mtl_primitive_type(enum VkPrimitiveTopology topology)
+{
+ switch (topology) {
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+ return MTL_PRIMITIVE_TYPE_POINT;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+ return MTL_PRIMITIVE_TYPE_LINE;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+ return MTL_PRIMITIVE_TYPE_LINE_STRIP;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wswitch"
+ case VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA:
+#pragma GCC diagnostic pop
+ /* Triangle fans are emulated meaning we'll translate the index buffer to
+ * triangle list or generate a index buffer if there's none */
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+ return MTL_PRIMITIVE_TYPE_TRIANGLE;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+ return MTL_PRIMITIVE_TYPE_TRIANGLE_STRIP;
+ default:
+ assert(0 && "Primitive topology not supported!");
+ return 0;
+ }
+}
+
+enum mtl_primitive_topology_class
+vk_primitive_topology_to_mtl_primitive_topology_class(
+ enum VkPrimitiveTopology topology)
+{
+ switch (topology) {
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+ return MTL_PRIMITIVE_TOPOLOGY_CLASS_POINT;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+ return MTL_PRIMITIVE_TOPOLOGY_CLASS_LINE;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wswitch"
+ case VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA:
+#pragma GCC diagnostic pop
+ return MTL_PRIMITIVE_TOPOLOGY_CLASS_TRIANGLE;
+ default:
+ return MTL_PRIMITIVE_TOPOLOGY_CLASS_UNSPECIFIED;
+ }
+}
+
+enum mtl_load_action
+vk_attachment_load_op_to_mtl_load_action(enum VkAttachmentLoadOp op)
+{
+ switch (op) {
+ case VK_ATTACHMENT_LOAD_OP_LOAD:
+ return MTL_LOAD_ACTION_LOAD;
+ case VK_ATTACHMENT_LOAD_OP_CLEAR:
+ return MTL_LOAD_ACTION_CLEAR;
+ case VK_ATTACHMENT_LOAD_OP_DONT_CARE:
+ return MTL_LOAD_ACTION_DONT_CARE;
+ default:
+ assert(false && "Unsupported VkAttachmentLoadOp");
+ return MTL_LOAD_ACTION_DONT_CARE;
+ };
+}
+
+enum mtl_store_action
+vk_attachment_store_op_to_mtl_store_action(enum VkAttachmentStoreOp op)
+{
+ switch (op) {
+ case VK_ATTACHMENT_STORE_OP_STORE:
+ return MTL_STORE_ACTION_STORE;
+ case VK_ATTACHMENT_STORE_OP_DONT_CARE:
+ return MTL_STORE_ACTION_DONT_CARE;
+ case VK_ATTACHMENT_STORE_OP_NONE:
+ return MTL_STORE_ACTION_UNKNOWN;
+ default:
+ assert(false && "Unsupported VkAttachmentStoreOp");
+ return MTL_STORE_ACTION_UNKNOWN;
+ };
+}
+
+enum mtl_sampler_address_mode
+vk_sampler_address_mode_to_mtl_sampler_address_mode(
+ enum VkSamplerAddressMode mode)
+{
+ switch (mode) {
+ case VK_SAMPLER_ADDRESS_MODE_REPEAT:
+ return MTL_SAMPLER_ADDRESS_MODE_REPEAT;
+ case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
+ return MTL_SAMPLER_ADDRESS_MODE_MIRROR_REPEAT;
+ case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
+ return MTL_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
+ case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
+ return MTL_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER_COLOR;
+ case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
+ return MTL_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
+ default:
+ UNREACHABLE("Unsupported address mode");
+ }
+}
+
+enum mtl_sampler_border_color
+vk_border_color_to_mtl_sampler_border_color(enum VkBorderColor color)
+{
+ switch (color) {
+ case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
+ case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
+ return MTL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK;
+ case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
+ case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
+ return MTL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK;
+ case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
+ case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
+ return MTL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE;
+ case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
+ case VK_BORDER_COLOR_INT_CUSTOM_EXT:
+ return MTL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE;
+ default:
+ UNREACHABLE("Unsupported address mode");
+ }
+}
+
+enum mtl_sampler_min_mag_filter
+vk_filter_to_mtl_sampler_min_mag_filter(enum VkFilter filter)
+{
+ switch (filter) {
+ case VK_FILTER_NEAREST:
+ return MTL_SAMPLER_MIN_MAG_FILTER_NEAREST;
+ case VK_FILTER_LINEAR:
+ return MTL_SAMPLER_MIN_MAG_FILTER_LINEAR;
+ default:
+ UNREACHABLE("Unsupported address mode");
+ }
+}
+
+enum mtl_sampler_mip_filter
+vk_sampler_mipmap_mode_to_mtl_sampler_mip_filter(enum VkSamplerMipmapMode mode)
+{
+ switch (mode) {
+ case VK_SAMPLER_MIPMAP_MODE_NEAREST:
+ return MTL_SAMPLER_MIP_FILTER_NEAREST;
+ case VK_SAMPLER_MIPMAP_MODE_LINEAR:
+ return MTL_SAMPLER_MIP_FILTER_LINEAR;
+ default:
+ UNREACHABLE("Unsupported address mode");
+ }
+}
+
+enum mtl_compare_function
+vk_compare_op_to_mtl_compare_function(enum VkCompareOp op)
+{
+ switch (op) {
+ case VK_COMPARE_OP_NEVER:
+ return MTL_COMPARE_FUNCTION_NEVER;
+ case VK_COMPARE_OP_LESS:
+ return MTL_COMPARE_FUNCTION_LESS;
+ case VK_COMPARE_OP_EQUAL:
+ return MTL_COMPARE_FUNCTION_EQUAL;
+ case VK_COMPARE_OP_LESS_OR_EQUAL:
+ return MTL_COMPARE_FUNCTION_LESS_EQUAL;
+ case VK_COMPARE_OP_GREATER:
+ return MTL_COMPARE_FUNCTION_GREATER;
+ case VK_COMPARE_OP_NOT_EQUAL:
+ return MTL_COMPARE_FUNCTION_NOT_EQUAL;
+ case VK_COMPARE_OP_GREATER_OR_EQUAL:
+ return MTL_COMPARE_FUNCTION_GREATER_EQUAL;
+ case VK_COMPARE_OP_ALWAYS:
+ return MTL_COMPARE_FUNCTION_ALWAYS;
+ default:
+ UNREACHABLE("Unsupported address mode");
+ }
+}
+
+enum mtl_winding
+vk_front_face_to_mtl_winding(enum VkFrontFace face)
+{
+ switch (face) {
+ case VK_FRONT_FACE_CLOCKWISE:
+ return MTL_WINDING_CLOCKWISE;
+ case VK_FRONT_FACE_COUNTER_CLOCKWISE:
+ return MTL_WINDING_COUNTER_CLOCKWISE;
+ default:
+ assert(false && "Unsupported VkFrontFace");
+ return MTL_WINDING_CLOCKWISE;
+ }
+}
+
+enum mtl_cull_mode
+vk_front_face_to_mtl_cull_mode(enum VkCullModeFlagBits mode)
+{
+ switch (mode) {
+ case VK_CULL_MODE_NONE:
+ return MTL_CULL_MODE_NONE;
+ case VK_CULL_MODE_FRONT_BIT:
+ return MTL_CULL_MODE_FRONT;
+ case VK_CULL_MODE_BACK_BIT:
+ return MTL_CULL_MODE_BACK;
+ default:
+ UNREACHABLE("Unsupported VkCullModeFlags");
+ }
+}
+
+enum mtl_index_type
+index_size_in_bytes_to_mtl_index_type(unsigned bytes)
+{
+ switch (bytes) {
+ case 2u:
+ return MTL_INDEX_TYPE_UINT16;
+ case 4u:
+ return MTL_INDEX_TYPE_UINT32;
+ default:
+ UNREACHABLE("Unsupported byte size for index");
+ }
+}
diff --git a/src/kosmickrisp/bridge/vk_to_mtl_map.h b/src/kosmickrisp/bridge/vk_to_mtl_map.h
new file mode 100644
index 00000000000..23e5506d9a6
--- /dev/null
+++ b/src/kosmickrisp/bridge/vk_to_mtl_map.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef KK_MTL_TO_VK_MAP_H
+#define KK_MTL_TO_VK_MAP_H 1
+
+enum pipe_format;
+
+struct mtl_origin;
+struct mtl_size;
+enum mtl_primitive_type;
+enum mtl_primitive_topology_class;
+enum mtl_load_action;
+enum mtl_store_action;
+enum mtl_sampler_address_mode;
+enum mtl_sampler_border_color;
+enum mtl_sampler_min_mag_filter;
+enum mtl_sampler_mip_filter;
+enum mtl_compare_function;
+enum mtl_winding;
+enum mtl_cull_mode;
+enum mtl_index_type;
+
+struct VkOffset3D;
+struct VkExtent3D;
+union VkClearColorValue;
+enum VkPrimitiveTopology;
+enum VkAttachmentLoadOp;
+enum VkAttachmentStoreOp;
+enum VkSamplerAddressMode;
+enum VkBorderColor;
+enum VkFilter;
+enum VkSamplerMipmapMode;
+enum VkCompareOp;
+enum VkFrontFace;
+enum VkCullModeFlagBits;
+
+/* STRUCTS */
+struct mtl_origin vk_offset_3d_to_mtl_origin(const struct VkOffset3D *offset);
+
+struct mtl_size vk_extent_3d_to_mtl_size(const struct VkExtent3D *extent);
+
+/* ENUMS */
+enum mtl_primitive_type
+vk_primitive_topology_to_mtl_primitive_type(enum VkPrimitiveTopology topology);
+
+enum mtl_primitive_topology_class
+vk_primitive_topology_to_mtl_primitive_topology_class(
+ enum VkPrimitiveTopology topology);
+
+enum mtl_load_action
+vk_attachment_load_op_to_mtl_load_action(enum VkAttachmentLoadOp op);
+
+enum mtl_store_action
+vk_attachment_store_op_to_mtl_store_action(enum VkAttachmentStoreOp op);
+
+enum mtl_sampler_address_mode
+vk_sampler_address_mode_to_mtl_sampler_address_mode(
+ enum VkSamplerAddressMode mode);
+
+enum mtl_sampler_border_color
+vk_border_color_to_mtl_sampler_border_color(enum VkBorderColor color);
+
+enum mtl_sampler_min_mag_filter
+vk_filter_to_mtl_sampler_min_mag_filter(enum VkFilter filter);
+
+enum mtl_sampler_mip_filter
+vk_sampler_mipmap_mode_to_mtl_sampler_mip_filter(enum VkSamplerMipmapMode mode);
+
+enum mtl_compare_function
+vk_compare_op_to_mtl_compare_function(enum VkCompareOp op);
+
+enum mtl_winding vk_front_face_to_mtl_winding(enum VkFrontFace face);
+
+enum mtl_cull_mode vk_front_face_to_mtl_cull_mode(enum VkCullModeFlagBits mode);
+
+enum mtl_index_type index_size_in_bytes_to_mtl_index_type(unsigned bytes);
+
+#endif /* KK_MTL_TO_VK_MAP_H */
diff --git a/src/kosmickrisp/compiler/meson.build b/src/kosmickrisp/compiler/meson.build
new file mode 100644
index 00000000000..34fe22245d2
--- /dev/null
+++ b/src/kosmickrisp/compiler/meson.build
@@ -0,0 +1,35 @@
+# Copyright 2025 LunarG, Inc.
+# Copyright 2025 Google LLC
+# SPDX-License-Identifier: MIT
+
+libmsl_compiler_files = files(
+ 'nir_to_msl.c',
+ 'msl_type_inference.c',
+ 'msl_iomap.c',
+ 'msl_nir_lower_common.c',
+ 'msl_nir_lower_subgroups.c',
+)
+
+msl_nir_algebraic_c = custom_target(
+ input : 'msl_nir_algebraic.py',
+ output : 'msl_nir_algebraic.c',
+ command : [
+ prog_python, '@INPUT@', '-p', dir_compiler_nir,
+ ],
+ capture : true,
+ depend_files : nir_algebraic_depends,
+)
+
+
+libmsl_compiler = static_library(
+ 'msl_compiler',
+ [libmsl_compiler_files, msl_nir_algebraic_c],
+ dependencies : [idep_nir, idep_mesautil],
+ gnu_symbol_visibility: 'hidden',
+ build_by_default: false,
+)
+
+idep_msl_to_nir = declare_dependency(
+ link_with : libmsl_compiler,
+)
+
diff --git a/src/kosmickrisp/compiler/msl_iomap.c b/src/kosmickrisp/compiler/msl_iomap.c
new file mode 100644
index 00000000000..aed3f012d29
--- /dev/null
+++ b/src/kosmickrisp/compiler/msl_iomap.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+/* This file primarily concerns itself with mapping from the NIR (and Vulkan)
+ * model of I/O to the Metal one. */
+
+#include "msl_private.h"
+
+#include "nir_builder.h"
+
+/* Mapping from alu type to Metal scalar type */
+static const char *
+alu_type_to_string(nir_alu_type type)
+{
+ switch (type) {
+ case nir_type_uint8:
+ return "uchar";
+ case nir_type_uint16:
+ return "ushort";
+ case nir_type_uint32:
+ return "uint";
+ case nir_type_uint64:
+ return "ulong";
+ case nir_type_int8:
+ return "char";
+ case nir_type_int16:
+ return "short";
+ case nir_type_int32:
+ return "int";
+ case nir_type_int64:
+ return "long";
+ case nir_type_float16:
+ return "half";
+ case nir_type_float32:
+ return "float";
+ case nir_type_bool8:
+ return "bool";
+ default:
+ UNREACHABLE("Unsupported nir_alu_type");
+ }
+};
+
+/* Type suffix for a vector of a given size. */
+static const char *vector_suffixes[] = {
+ [1] = "",
+ [2] = "2",
+ [3] = "3",
+ [4] = "4",
+};
+
+/* The type names of the generated output structs */
+static const char *VERTEX_OUTPUT_TYPE = "VertexOut";
+static const char *FRAGMENT_OUTPUT_TYPE = "FragmentOut";
+
+/* Mapping from NIR's varying slots to the generated struct member name */
+static const char *VARYING_SLOT_NAME[NUM_TOTAL_VARYING_SLOTS] = {
+ [VARYING_SLOT_POS] = "position",
+ [VARYING_SLOT_PSIZ] = "point_size",
+ [VARYING_SLOT_PRIMITIVE_ID] = "primitive_id",
+ [VARYING_SLOT_LAYER] = "layer",
+ [VARYING_SLOT_VAR0] = "vary_00",
+ [VARYING_SLOT_VAR1] = "vary_01",
+ [VARYING_SLOT_VAR2] = "vary_02",
+ [VARYING_SLOT_VAR3] = "vary_03",
+ [VARYING_SLOT_VAR4] = "vary_04",
+ [VARYING_SLOT_VAR5] = "vary_05",
+ [VARYING_SLOT_VAR6] = "vary_06",
+ [VARYING_SLOT_VAR7] = "vary_07",
+ [VARYING_SLOT_VAR8] = "vary_08",
+ [VARYING_SLOT_VAR9] = "vary_09",
+ [VARYING_SLOT_VAR10] = "vary_10",
+ [VARYING_SLOT_VAR11] = "vary_11",
+ [VARYING_SLOT_VAR12] = "vary_12",
+ [VARYING_SLOT_VAR13] = "vary_13",
+ [VARYING_SLOT_VAR14] = "vary_14",
+ [VARYING_SLOT_VAR15] = "vary_15",
+ [VARYING_SLOT_VAR16] = "vary_16",
+ [VARYING_SLOT_VAR17] = "vary_17",
+ [VARYING_SLOT_VAR18] = "vary_18",
+ [VARYING_SLOT_VAR19] = "vary_19",
+ [VARYING_SLOT_VAR20] = "vary_20",
+ [VARYING_SLOT_VAR21] = "vary_21",
+ [VARYING_SLOT_VAR22] = "vary_22",
+ [VARYING_SLOT_VAR23] = "vary_23",
+ [VARYING_SLOT_VAR24] = "vary_24",
+ [VARYING_SLOT_VAR25] = "vary_25",
+ [VARYING_SLOT_VAR26] = "vary_26",
+ [VARYING_SLOT_VAR27] = "vary_27",
+ [VARYING_SLOT_VAR28] = "vary_28",
+ [VARYING_SLOT_VAR29] = "vary_29",
+ [VARYING_SLOT_VAR30] = "vary_30",
+ [VARYING_SLOT_VAR31] = "vary_31",
+};
+
+/* Mapping from NIR varying slot to the MSL struct member attribute. */
+static const char *VARYING_SLOT_SEMANTIC[NUM_TOTAL_VARYING_SLOTS] = {
+ [VARYING_SLOT_POS] = "[[position]]",
+ [VARYING_SLOT_PSIZ] = "[[point_size]]",
+ [VARYING_SLOT_PRIMITIVE_ID] = "[[primitive_id]]",
+ [VARYING_SLOT_LAYER] = "[[render_target_array_index]]",
+ [VARYING_SLOT_VAR0] = "[[user(vary_00)]]",
+ [VARYING_SLOT_VAR1] = "[[user(vary_01)]]",
+ [VARYING_SLOT_VAR2] = "[[user(vary_02)]]",
+ [VARYING_SLOT_VAR3] = "[[user(vary_03)]]",
+ [VARYING_SLOT_VAR4] = "[[user(vary_04)]]",
+ [VARYING_SLOT_VAR5] = "[[user(vary_05)]]",
+ [VARYING_SLOT_VAR6] = "[[user(vary_06)]]",
+ [VARYING_SLOT_VAR7] = "[[user(vary_07)]]",
+ [VARYING_SLOT_VAR8] = "[[user(vary_08)]]",
+ [VARYING_SLOT_VAR9] = "[[user(vary_09)]]",
+ [VARYING_SLOT_VAR10] = "[[user(vary_10)]]",
+ [VARYING_SLOT_VAR11] = "[[user(vary_11)]]",
+ [VARYING_SLOT_VAR12] = "[[user(vary_12)]]",
+ [VARYING_SLOT_VAR13] = "[[user(vary_13)]]",
+ [VARYING_SLOT_VAR14] = "[[user(vary_14)]]",
+ [VARYING_SLOT_VAR15] = "[[user(vary_15)]]",
+ [VARYING_SLOT_VAR16] = "[[user(vary_16)]]",
+ [VARYING_SLOT_VAR17] = "[[user(vary_17)]]",
+ [VARYING_SLOT_VAR18] = "[[user(vary_18)]]",
+ [VARYING_SLOT_VAR19] = "[[user(vary_19)]]",
+ [VARYING_SLOT_VAR20] = "[[user(vary_20)]]",
+ [VARYING_SLOT_VAR21] = "[[user(vary_21)]]",
+ [VARYING_SLOT_VAR22] = "[[user(vary_22)]]",
+ [VARYING_SLOT_VAR23] = "[[user(vary_23)]]",
+ [VARYING_SLOT_VAR24] = "[[user(vary_24)]]",
+ [VARYING_SLOT_VAR25] = "[[user(vary_25)]]",
+ [VARYING_SLOT_VAR26] = "[[user(vary_26)]]",
+ [VARYING_SLOT_VAR27] = "[[user(vary_27)]]",
+ [VARYING_SLOT_VAR28] = "[[user(vary_28)]]",
+ [VARYING_SLOT_VAR29] = "[[user(vary_29)]]",
+ [VARYING_SLOT_VAR30] = "[[user(vary_30)]]",
+ [VARYING_SLOT_VAR31] = "[[user(vary_31)]]",
+};
+
+/* Mapping from NIR fragment output slot to MSL struct member name */
+static const char *FS_OUTPUT_NAME[] = {
+ [FRAG_RESULT_DEPTH] = "depth_out",
+ [FRAG_RESULT_STENCIL] = "stencil_out",
+ [FRAG_RESULT_SAMPLE_MASK] = "sample_mask_out",
+ [FRAG_RESULT_DATA0] = "color_0",
+ [FRAG_RESULT_DATA1] = "color_1",
+ [FRAG_RESULT_DATA2] = "color_2",
+ [FRAG_RESULT_DATA3] = "color_3",
+ [FRAG_RESULT_DATA4] = "color_4",
+ [FRAG_RESULT_DATA5] = "color_5",
+ [FRAG_RESULT_DATA6] = "color_6",
+ [FRAG_RESULT_DATA7] = "color_7",
+};
+
+/* Mapping from NIR fragment output slot to MSL struct member attribute */
+static const char *FS_OUTPUT_SEMANTIC[] = {
+ [FRAG_RESULT_DEPTH] = "", // special case, depends on depth layout
+ [FRAG_RESULT_STENCIL] = "stencil", [FRAG_RESULT_SAMPLE_MASK] = "sample_mask",
+ [FRAG_RESULT_DATA0] = "color(0)", [FRAG_RESULT_DATA1] = "color(1)",
+ [FRAG_RESULT_DATA2] = "color(2)", [FRAG_RESULT_DATA3] = "color(3)",
+ [FRAG_RESULT_DATA4] = "color(4)", [FRAG_RESULT_DATA5] = "color(5)",
+ [FRAG_RESULT_DATA6] = "color(6)", [FRAG_RESULT_DATA7] = "color(7)",
+};
+
+const char *depth_layout_arg[8] = {
+ [FRAG_DEPTH_LAYOUT_ANY] = "any",
+ [FRAG_DEPTH_LAYOUT_GREATER] = "greater",
+ [FRAG_DEPTH_LAYOUT_LESS] = "less",
+ [FRAG_DEPTH_LAYOUT_UNCHANGED] = "any",
+};
+
+/* Generate the struct definition for the vertex shader return value */
+static void
+vs_output_block(nir_shader *shader, struct nir_to_msl_ctx *ctx)
+{
+ P(ctx, "struct %s {\n", VERTEX_OUTPUT_TYPE);
+ ctx->indentlevel++;
+ u_foreach_bit64(location, shader->info.outputs_written) {
+ struct io_slot_info info = ctx->outputs_info[location];
+ const char *type = alu_type_to_string(info.type);
+ const char *vector_suffix = vector_suffixes[info.num_components];
+ P_IND(ctx, "%s%s %s %s;\n", type, vector_suffix,
+ VARYING_SLOT_NAME[location], VARYING_SLOT_SEMANTIC[location]);
+ }
+
+ ctx->indentlevel--;
+ P(ctx, "};\n");
+}
+
+/* Generate the struct definition for the fragment shader input argument */
+static void
+fs_input_block(nir_shader *shader, struct nir_to_msl_ctx *ctx)
+{
+ P(ctx, "struct FragmentIn {\n");
+ ctx->indentlevel++;
+ u_foreach_bit64(location, shader->info.inputs_read) {
+ struct io_slot_info info = ctx->inputs_info[location];
+ const char *type = alu_type_to_string(info.type);
+ const char *vector_suffix = vector_suffixes[info.num_components];
+ const char *interp = "";
+ switch (info.interpolation) {
+ case INTERP_MODE_NOPERSPECTIVE:
+ if (info.centroid)
+ interp = "[[centroid_no_perspective]]";
+ else if (info.sample)
+ interp = "[[sample_no_perspective]]";
+ else
+ interp = "[[center_no_perspective]]";
+ break;
+ case INTERP_MODE_FLAT:
+ interp = "[[flat]]";
+ break;
+ default:
+ if (info.centroid)
+ interp = "[[centroid_perspective]]";
+ else if (info.sample)
+ interp = "[[sample_perspective]]";
+ break;
+ }
+ P_IND(ctx, "%s%s %s %s %s;\n", type, vector_suffix,
+ VARYING_SLOT_NAME[location], VARYING_SLOT_SEMANTIC[location],
+ interp);
+ }
+
+ /* Enable reading from framebuffer */
+ u_foreach_bit64(location, shader->info.outputs_read) {
+ struct io_slot_info info = ctx->outputs_info[location];
+ const char *type = alu_type_to_string(info.type);
+ const char *vector_suffix = vector_suffixes[info.num_components];
+ P_IND(ctx, "%s%s ", type, vector_suffix);
+ P(ctx, "%s [[%s, raster_order_group(0)]];\n", FS_OUTPUT_NAME[location],
+ FS_OUTPUT_SEMANTIC[location]);
+ }
+
+ ctx->indentlevel--;
+ P(ctx, "};\n");
+}
+
+/* Generate the struct definition for the fragment shader return value */
+static void
+fs_output_block(nir_shader *shader, struct nir_to_msl_ctx *ctx)
+{
+ P_IND(ctx, "struct %s {\n", FRAGMENT_OUTPUT_TYPE);
+ ctx->indentlevel++;
+ u_foreach_bit64(location, shader->info.outputs_written) {
+ struct io_slot_info info = ctx->outputs_info[location];
+ const char *type = alu_type_to_string(info.type);
+ const char *vector_suffix = vector_suffixes[info.num_components];
+ P_IND(ctx, "%s%s ", type, vector_suffix);
+ if (location == FRAG_RESULT_DEPTH) {
+ enum gl_frag_depth_layout depth_layout = shader->info.fs.depth_layout;
+ assert(depth_layout_arg[depth_layout]);
+ P(ctx, "%s [[depth(%s)]];\n", FS_OUTPUT_NAME[location],
+ depth_layout_arg[depth_layout]);
+ } else {
+ P(ctx, "%s [[%s]];\n", FS_OUTPUT_NAME[location],
+ FS_OUTPUT_SEMANTIC[location]);
+ }
+ }
+ ctx->indentlevel--;
+ P_IND(ctx, "};\n")
+}
+
+struct gather_ctx {
+ struct io_slot_info *input;
+ struct io_slot_info *output;
+};
+
+static bool
+msl_nir_gather_io_info(nir_builder *b, nir_intrinsic_instr *intrin, void *data)
+{
+ struct gather_ctx *ctx = (struct gather_ctx *)data;
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_interpolated_input: {
+ unsigned component = nir_intrinsic_component(intrin);
+ struct nir_io_semantics io = nir_intrinsic_io_semantics(intrin);
+ assert(io.num_slots == 1u && "We don't support arrays");
+
+ unsigned location = nir_src_as_uint(intrin->src[1u]) + io.location;
+ ctx->input[location].type = nir_intrinsic_dest_type(intrin);
+ ctx->input[location].num_components =
+ MAX2(ctx->input[location].num_components,
+ intrin->num_components + component);
+ assert(ctx->input[location].num_components <= 4u &&
+ "Cannot have more than a vec4");
+
+ nir_intrinsic_instr *interp_intrin =
+ nir_src_as_intrinsic(intrin->src[0u]);
+ ctx->input[location].interpolation =
+ nir_intrinsic_interp_mode(interp_intrin);
+ ctx->input[location].centroid =
+ interp_intrin->intrinsic == nir_intrinsic_load_barycentric_centroid;
+ ctx->input[location].sample =
+ interp_intrin->intrinsic == nir_intrinsic_load_barycentric_sample;
+ break;
+ }
+ case nir_intrinsic_load_input: {
+ unsigned component = nir_intrinsic_component(intrin);
+ struct nir_io_semantics io = nir_intrinsic_io_semantics(intrin);
+ assert(io.num_slots == 1u && "We don't support arrays");
+
+ unsigned location = nir_src_as_uint(intrin->src[0u]) + io.location;
+ ctx->input[location].type = nir_intrinsic_dest_type(intrin);
+ ctx->input[location].interpolation = INTERP_MODE_FLAT;
+ ctx->input[location].num_components =
+ MAX2(ctx->input[location].num_components,
+ intrin->num_components + component);
+ assert(ctx->input[location].num_components <= 4u &&
+ "Cannot have more than a vec4");
+ break;
+ }
+ case nir_intrinsic_load_output: {
+ unsigned component = nir_intrinsic_component(intrin);
+ struct nir_io_semantics io = nir_intrinsic_io_semantics(intrin);
+ assert(io.num_slots == 1u && "We don't support arrays");
+
+ unsigned location = nir_src_as_uint(intrin->src[0u]) + io.location;
+ ctx->output[location].type = nir_intrinsic_dest_type(intrin);
+ ctx->output[location].num_components =
+ MAX2(ctx->output[location].num_components,
+ intrin->num_components + component);
+ assert(ctx->output[location].num_components <= 4u &&
+ "Cannot have more than a vec4");
+ break;
+ }
+ case nir_intrinsic_store_output: {
+ unsigned component = nir_intrinsic_component(intrin);
+ unsigned write_mask = nir_intrinsic_write_mask(intrin);
+ struct nir_io_semantics io = nir_intrinsic_io_semantics(intrin);
+ assert(io.num_slots == 1u && "We don't support arrays");
+
+ /* Due to nir_lower_blend that doesn't generate intrinsics with the same
+ * num_components as destination, we need to compute current store's
+ * num_components using offset and mask. */
+ unsigned num_components = component + 1u;
+ unsigned mask_left_most_index = 0u;
+ for (unsigned i = 0u; i < intrin->num_components; ++i) {
+ if ((write_mask >> i) & 1u)
+ mask_left_most_index = i;
+ }
+ num_components += mask_left_most_index;
+ unsigned location = nir_src_as_uint(intrin->src[1u]) + io.location;
+ ctx->output[location].type = nir_intrinsic_src_type(intrin);
+ ctx->output[location].num_components =
+ MAX3(ctx->output[location].num_components, num_components,
+ intrin->num_components);
+ assert(ctx->output[location].num_components <= 4u &&
+ "Cannot have more than a vec4");
+ break;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
+
+void
+msl_gather_io_info(struct nir_to_msl_ctx *ctx,
+ struct io_slot_info *info_array_input,
+ struct io_slot_info *info_array_output)
+{
+ struct gather_ctx gather_ctx = {
+ .input = info_array_input,
+ .output = info_array_output,
+ };
+ nir_shader_intrinsics_pass(ctx->shader, msl_nir_gather_io_info,
+ nir_metadata_all, &gather_ctx);
+}
+
+/* Generate all the struct definitions needed for shader I/O */
+void
+msl_emit_io_blocks(struct nir_to_msl_ctx *ctx, nir_shader *shader)
+{
+ switch (ctx->shader->info.stage) {
+ case MESA_SHADER_VERTEX:
+ vs_output_block(shader, ctx);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ fs_input_block(shader, ctx);
+ fs_output_block(shader, ctx);
+ break;
+ case MESA_SHADER_COMPUTE:
+ break;
+ default:
+ assert(0);
+ }
+ // TODO_KOSMICKRISP This should not exist. We need to create input structs in
+ // nir that will later be translated
+ P(ctx, "struct Buffer {\n");
+ ctx->indentlevel++;
+ P_IND(ctx, "uint64_t contents[1];\n"); // TODO_KOSMICKRISP This should not be
+ // a cpu pointer
+ ctx->indentlevel--;
+ P(ctx, "};\n")
+
+ P(ctx, "struct SamplerTable {\n");
+ ctx->indentlevel++;
+ P_IND(ctx, "sampler handles[1024];\n");
+ ctx->indentlevel--;
+ P(ctx, "};\n")
+}
+
+void
+msl_emit_output_var(struct nir_to_msl_ctx *ctx, nir_shader *shader)
+{
+ switch (shader->info.stage) {
+ case MESA_SHADER_VERTEX:
+ P_IND(ctx, "%s out = {};\n", VERTEX_OUTPUT_TYPE);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ P_IND(ctx, "%s out = {};\n", FRAGMENT_OUTPUT_TYPE);
+
+ /* Load inputs to output */
+ u_foreach_bit64(location, shader->info.outputs_read) {
+ P_IND(ctx, "out.%s = in.%s;\n", FS_OUTPUT_NAME[location],
+ FS_OUTPUT_NAME[location]);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+const char *
+msl_output_name(struct nir_to_msl_ctx *ctx, unsigned location)
+{
+ switch (ctx->shader->info.stage) {
+ case MESA_SHADER_VERTEX:
+ return VARYING_SLOT_NAME[location];
+ case MESA_SHADER_FRAGMENT:
+ return FS_OUTPUT_NAME[location];
+ default:
+ assert(0);
+ return "";
+ }
+}
+
+const char *
+msl_input_name(struct nir_to_msl_ctx *ctx, unsigned location)
+{
+ switch (ctx->shader->info.stage) {
+ case MESA_SHADER_FRAGMENT:
+ return VARYING_SLOT_NAME[location];
+ default:
+ assert(0);
+ return "";
+ }
+}
diff --git a/src/kosmickrisp/compiler/msl_nir_algebraic.py b/src/kosmickrisp/compiler/msl_nir_algebraic.py
new file mode 100644
index 00000000000..16d5fdaea9d
--- /dev/null
+++ b/src/kosmickrisp/compiler/msl_nir_algebraic.py
@@ -0,0 +1,38 @@
+# Copyright 2025 LunarG, Inc.
+# Copyright 2025 Google LLC
+# Copyright 2022 Alyssa Rosenzweig
+# Copyright 2021 Collabora, Ltd.
+# Copyright 2016 Intel Corporation
+# SPDX-License-Identifier: MIT
+
+import argparse
+import sys
+import math
+
+a = 'a'
+
+lower_pack = [
+ # Based on the VIR lowering
+ (('f2f16_rtz', 'a@32'),
+ ('bcsel', ('flt', ('fabs', a), ('fabs', ('f2f32', ('f2f16_rtne', a)))),
+ ('isub', ('f2f16_rtne', a), 1), ('f2f16_rtne', a))),
+]
+
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-p', '--import-path', required=True)
+ args = parser.parse_args()
+ sys.path.insert(0, args.import_path)
+ run()
+
+def run():
+ import nir_algebraic # pylint: disable=import-error
+
+ print('#include "msl_private.h"')
+
+ print(nir_algebraic.AlgebraicPass("msl_nir_lower_algebraic_late", lower_pack).render())
+
+if __name__ == '__main__':
+ main()
diff --git a/src/kosmickrisp/compiler/msl_nir_lower_common.c b/src/kosmickrisp/compiler/msl_nir_lower_common.c
new file mode 100644
index 00000000000..34e0f264bcb
--- /dev/null
+++ b/src/kosmickrisp/compiler/msl_nir_lower_common.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+#include "nir_to_msl.h"
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#include "util/format/u_format.h"
+
+bool
+msl_nir_vs_remove_point_size_write(nir_builder *b, nir_intrinsic_instr *intrin,
+ void *data)
+{
+ if (intrin->intrinsic != nir_intrinsic_store_output)
+ return false;
+
+ nir_io_semantics io = nir_intrinsic_io_semantics(intrin);
+ if (io.location == VARYING_SLOT_PSIZ) {
+ return nir_remove_sysval_output(intrin, MESA_SHADER_FRAGMENT);
+ }
+
+ return false;
+}
+
+bool
+msl_nir_fs_remove_depth_write(nir_builder *b, nir_intrinsic_instr *intrin,
+ void *data)
+{
+ if (intrin->intrinsic != nir_intrinsic_store_output)
+ return false;
+
+ nir_io_semantics io = nir_intrinsic_io_semantics(intrin);
+ if (io.location == FRAG_RESULT_DEPTH) {
+ return nir_remove_sysval_output(intrin, MESA_SHADER_FRAGMENT);
+ }
+
+ return false;
+}
+
+bool
+msl_nir_fs_force_output_signedness(
+ nir_shader *nir, enum pipe_format render_target_formats[MAX_DRAW_BUFFERS])
+{
+ assert(nir->info.stage == MESA_SHADER_FRAGMENT);
+
+ bool update_derefs = false;
+ nir_foreach_variable_with_modes(var, nir, nir_var_shader_out) {
+ if (FRAG_RESULT_DATA0 <= var->data.location &&
+ var->data.location <= FRAG_RESULT_DATA7 &&
+ glsl_type_is_integer(var->type)) {
+ unsigned int slot = var->data.location - FRAG_RESULT_DATA0;
+
+ if (glsl_type_is_uint_16_32_64(var->type) &&
+ util_format_is_pure_sint(render_target_formats[slot])) {
+ var->type = glsl_ivec_type(var->type->vector_elements);
+ update_derefs = true;
+ } else if (glsl_type_is_int_16_32_64(var->type) &&
+ util_format_is_pure_uint(render_target_formats[slot])) {
+ var->type = glsl_uvec_type(var->type->vector_elements);
+ update_derefs = true;
+ }
+ }
+ }
+
+ if (update_derefs) {
+ nir_foreach_function_impl(impl, nir) {
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr(instr, block) {
+ switch (instr->type) {
+ case nir_instr_type_deref: {
+ nir_deref_instr *deref = nir_instr_as_deref(instr);
+ if (deref->deref_type == nir_deref_type_var) {
+ deref->type = deref->var->type;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ }
+ nir_progress(update_derefs, impl, nir_metadata_control_flow);
+ }
+ }
+
+ return update_derefs;
+}
+
+bool
+msl_lower_textures(nir_shader *nir)
+{
+ bool progress = false;
+ nir_lower_tex_options lower_tex_options = {
+ .lower_txp = ~0u,
+ .lower_sampler_lod_bias = true,
+
+ /* We don't use 1D textures because they are really limited in Metal */
+ .lower_1d = true,
+
+ /* Metal does not support tg4 with individual offsets for each sample */
+ .lower_tg4_offsets = true,
+
+ /* Metal does not natively support offsets for texture.read operations */
+ .lower_txf_offset = true,
+ .lower_txd_cube_map = true,
+ };
+
+ NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options);
+ return progress;
+}
+
+static bool
+replace_sample_id_for_sample_mask(nir_builder *b, nir_intrinsic_instr *intrin,
+ void *data)
+{
+ if (intrin->intrinsic != nir_intrinsic_load_sample_mask_in)
+ return false;
+
+ nir_def_replace(nir_instr_def(&intrin->instr), (nir_def *)data);
+ return true;
+}
+
+static bool
+msl_replace_load_sample_mask_in_for_static_sample_mask(
+ nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ if (intr->intrinsic != nir_intrinsic_load_sample_mask_in)
+ return false;
+
+ nir_def *sample_mask = (nir_def *)data;
+ nir_def_rewrite_uses(&intr->def, sample_mask);
+ return true;
+}
+
+bool
+msl_lower_static_sample_mask(nir_shader *nir, uint32_t sample_mask)
+{
+ /* Only support vertex for now */
+ assert(nir->info.stage == MESA_SHADER_FRAGMENT);
+
+ /* Embed sample mask */
+ nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
+ nir_builder b = nir_builder_at(nir_before_impl(entrypoint));
+
+ struct nir_io_semantics io_semantics = {
+ .location = FRAG_RESULT_SAMPLE_MASK,
+ .num_slots = 1u,
+ };
+ nir_def *sample_mask_def = nir_imm_int(&b, sample_mask);
+ nir_store_output(&b, sample_mask_def, nir_imm_int(&b, 0u), .base = 0u,
+ .range = 1u, .write_mask = 0x1, .component = 0u,
+ .src_type = nir_type_uint32, .io_semantics = io_semantics);
+
+ return nir_shader_intrinsics_pass(
+ nir, msl_replace_load_sample_mask_in_for_static_sample_mask,
+ nir_metadata_control_flow, sample_mask_def);
+
+ return true;
+}
+
+bool
+msl_ensure_depth_write(nir_shader *nir)
+{
+ assert(nir->info.stage == MESA_SHADER_FRAGMENT);
+
+ bool has_depth_write =
+ nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH);
+ if (!has_depth_write) {
+ nir_variable *depth_var = nir_create_variable_with_location(
+ nir, nir_var_shader_out, FRAG_RESULT_DEPTH, glsl_float_type());
+
+ /* Write to depth at the very beginning */
+ nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
+ nir_builder b = nir_builder_at(nir_before_impl(entrypoint));
+
+ nir_deref_instr *depth_deref = nir_build_deref_var(&b, depth_var);
+ nir_def *position = nir_load_frag_coord(&b);
+ nir_store_deref(&b, depth_deref, nir_channel(&b, position, 2u),
+ 0xFFFFFFFF);
+
+ nir->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_DEPTH);
+ nir->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
+ return nir_progress(true, entrypoint, nir_metadata_control_flow);
+ }
+ return false;
+}
+
+bool
+msl_ensure_vertex_position_output(nir_shader *nir)
+{
+ assert(nir->info.stage == MESA_SHADER_VERTEX);
+
+ bool has_position_write =
+ nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_POS);
+ if (!has_position_write) {
+ nir_variable *position_var = nir_create_variable_with_location(
+ nir, nir_var_shader_out, VARYING_SLOT_POS, glsl_vec4_type());
+
+ /* Write to depth at the very beginning */
+ nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
+ nir_builder b = nir_builder_at(nir_before_impl(entrypoint));
+
+ nir_deref_instr *position_deref = nir_build_deref_var(&b, position_var);
+ nir_def *zero = nir_imm_float(&b, 0.0f);
+ nir_store_deref(&b, position_deref, nir_vec4(&b, zero, zero, zero, zero),
+ 0xFFFFFFFF);
+
+ nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_POS);
+ return nir_progress(true, entrypoint, nir_metadata_control_flow);
+ }
+ return false;
+}
+
+static bool
+msl_sample_mask_uint(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ if (intr->intrinsic == nir_intrinsic_store_output) {
+ struct nir_io_semantics io = nir_intrinsic_io_semantics(intr);
+ if (io.location == FRAG_RESULT_SAMPLE_MASK)
+ nir_intrinsic_set_src_type(intr, nir_type_uint32);
+ }
+
+ return false;
+}
+
+bool
+msl_nir_sample_mask_type(nir_shader *nir)
+{
+ assert(nir->info.stage == MESA_SHADER_FRAGMENT);
+ return nir_shader_intrinsics_pass(nir, msl_sample_mask_uint,
+ nir_metadata_all, NULL);
+}
+
+static bool
+msl_layer_id_uint(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ if (intr->intrinsic == nir_intrinsic_store_output) {
+ struct nir_io_semantics io = nir_intrinsic_io_semantics(intr);
+ if (io.location == VARYING_SLOT_LAYER)
+ nir_intrinsic_set_src_type(intr, nir_type_uint32);
+ }
+
+ return false;
+}
+
+bool
+msl_nir_layer_id_type(nir_shader *nir)
+{
+ assert(nir->info.stage == MESA_SHADER_VERTEX);
+ return nir_shader_intrinsics_pass(nir, msl_layer_id_uint, nir_metadata_all,
+ NULL);
+}
diff --git a/src/kosmickrisp/compiler/msl_nir_lower_subgroups.c b/src/kosmickrisp/compiler/msl_nir_lower_subgroups.c
new file mode 100644
index 00000000000..30201145c4a
--- /dev/null
+++ b/src/kosmickrisp/compiler/msl_nir_lower_subgroups.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2023 Valve Corporation
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "msl_private.h"
+#include "nir.h"
+#include "nir_builder.h"
+
+static bool
+needs_bool_widening(nir_intrinsic_instr *intrin)
+{
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_read_invocation:
+ case nir_intrinsic_read_first_invocation:
+ case nir_intrinsic_reduce:
+ case nir_intrinsic_quad_broadcast:
+ case nir_intrinsic_quad_swap_horizontal:
+ case nir_intrinsic_quad_swap_vertical:
+ case nir_intrinsic_quad_swap_diagonal:
+ case nir_intrinsic_shuffle:
+ case nir_intrinsic_shuffle_down:
+ case nir_intrinsic_shuffle_up:
+ case nir_intrinsic_shuffle_xor:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+lower_bool_ops(nir_builder *b, nir_intrinsic_instr *intrin, void *_unused)
+{
+ if (!needs_bool_widening(intrin))
+ return false;
+
+ if (intrin->def.bit_size != 1)
+ return false;
+
+ b->cursor = nir_before_instr(&intrin->instr);
+ nir_def *widen = nir_b2i32(b, intrin->src[0].ssa);
+ nir_src_rewrite(&intrin->src[0], widen);
+ intrin->def.bit_size = 32;
+ b->cursor = nir_after_instr(&intrin->instr);
+ nir_def *narrow = nir_b2b1(b, &intrin->def);
+ nir_def_rewrite_uses_after(&intrin->def, narrow);
+
+ return true;
+}
+
+static bool
+lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ b->cursor = nir_before_instr(&intr->instr);
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_vote_any: {
+ /* We don't have vote instructions, but we have efficient ballots */
+ nir_def *ballot = nir_ballot(b, 1, 32, intr->src[0].ssa);
+ nir_def_rewrite_uses(&intr->def, nir_ine_imm(b, ballot, 0));
+ return true;
+ }
+
+ case nir_intrinsic_vote_all: {
+ nir_def *ballot = nir_ballot(b, 1, 32, nir_inot(b, intr->src[0].ssa));
+ nir_def_rewrite_uses(&intr->def, nir_ieq_imm(b, ballot, 0));
+ return true;
+ }
+
+ default:
+ return false;
+ }
+}
+
+void
+msl_nir_lower_subgroups(nir_shader *nir)
+{
+ const nir_lower_subgroups_options subgroups_options = {
+ .subgroup_size = 32,
+ .ballot_bit_size = 32,
+ .ballot_components = 1,
+ .lower_subgroup_masks = true,
+ .lower_vote_ieq = true,
+ .lower_vote_feq = true,
+ .lower_vote_bool_eq = true,
+ .lower_inverse_ballot = true,
+ .lower_relative_shuffle = true,
+ .lower_quad = true,
+ .lower_reduce = true,
+ };
+ NIR_PASS(_, nir, nir_lower_subgroups, &subgroups_options);
+ NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower,
+ nir_metadata_control_flow, NULL);
+ NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_bool_ops,
+ nir_metadata_control_flow, NULL);
+}
diff --git a/src/kosmickrisp/compiler/msl_private.h b/src/kosmickrisp/compiler/msl_private.h
new file mode 100644
index 00000000000..9ccd2bd7922
--- /dev/null
+++ b/src/kosmickrisp/compiler/msl_private.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#pragma once
+
+#include "util/string_buffer.h"
+#include "nir.h"
+
+struct io_slot_info {
+ nir_alu_type type;
+ uint32_t interpolation;
+ unsigned num_components;
+ bool centroid;
+ bool sample;
+};
+
+struct nir_to_msl_ctx {
+ FILE *output;
+ struct hash_table *types;
+ nir_shader *shader;
+ struct _mesa_string_buffer *text;
+ unsigned short indentlevel;
+ struct io_slot_info inputs_info[NUM_TOTAL_VARYING_SLOTS];
+ struct io_slot_info outputs_info[NUM_TOTAL_VARYING_SLOTS];
+};
+
+#define P_IND(ctx, ...) \
+ do { \
+ for (unsigned i = 0; i < (ctx)->indentlevel; i++) \
+ _mesa_string_buffer_append((ctx)->text, " "); \
+ _mesa_string_buffer_printf((ctx)->text, __VA_ARGS__); \
+ } while (0);
+
+#define P(ctx, ...) _mesa_string_buffer_printf((ctx)->text, __VA_ARGS__);
+
+#define P_INDENT(ctx) \
+ do { \
+ for (unsigned i = 0; i < (ctx)->indentlevel; i++) \
+ _mesa_string_buffer_append((ctx)->text, " "); \
+ } while (0)
+
+/* Perform type inference. The returned value is a
+ * map from nir_def* to base type.*/
+
+struct hash_table *msl_infer_types(nir_shader *shader);
+
+const char *msl_type_for_def(struct hash_table *types, nir_def *def);
+
+const char *msl_uint_type(uint8_t bit_size, uint8_t num_components);
+
+const char *msl_type_for_src(struct hash_table *types, nir_src *src);
+
+const char *msl_bitcast_for_src(struct hash_table *types, nir_src *src);
+
+void msl_src_as_const(struct nir_to_msl_ctx *ctx, nir_src *src);
+
+void msl_emit_io_blocks(struct nir_to_msl_ctx *ctx, nir_shader *shader);
+
+void msl_emit_output_var(struct nir_to_msl_ctx *ctx, nir_shader *shader);
+
+void msl_gather_io_info(struct nir_to_msl_ctx *ctx,
+ struct io_slot_info *info_array_input,
+ struct io_slot_info *info_array_output);
+
+const char *msl_input_name(struct nir_to_msl_ctx *ctx, unsigned location);
+
+const char *msl_output_name(struct nir_to_msl_ctx *ctx, unsigned location);
+
+bool msl_src_is_float(struct nir_to_msl_ctx *ctx, nir_src *src);
+bool msl_def_is_sampler(struct nir_to_msl_ctx *ctx, nir_def *def);
+
+void msl_nir_lower_subgroups(nir_shader *nir);
+
+bool msl_nir_lower_algebraic_late(nir_shader *shader);
diff --git a/src/kosmickrisp/compiler/msl_type_inference.c b/src/kosmickrisp/compiler/msl_type_inference.c
new file mode 100644
index 00000000000..c10d90c3481
--- /dev/null
+++ b/src/kosmickrisp/compiler/msl_type_inference.c
@@ -0,0 +1,857 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "vulkan/vulkan_core.h"
+#include "msl_private.h"
+
+typedef enum ti_type {
+ /* We haven't been able to assign a type yet */
+ TYPE_NONE = 0,
+ /* All we know is that this is used in I/O, we
+ * can treat it as an opaque value (i.e. uint) */
+ TYPE_GENERIC_DATA,
+ /* A generic int used in ALU operations but also can a bool for bitwise ops */
+ TYPE_GENERIC_INT_OR_BOOL,
+ /* A generic int used in ALU operations that can be int or uint */
+ TYPE_GENERIC_INT,
+ /* These are actual concrete types. */
+ TYPE_INT,
+ TYPE_UINT,
+ TYPE_BOOL,
+ TYPE_FLOAT,
+ TYPE_SAMPLER,
+} ti_type;
+
+static ti_type
+unify_types(ti_type t1, ti_type t2)
+{
+ ti_type generic = MIN2(t1, t2);
+ ti_type specific = MAX2(t1, t2);
+ if (t1 == t2)
+ return TYPE_NONE;
+ // NONE or GENERIC_DATA can be upgraded into any concrete type
+ if (generic == TYPE_GENERIC_DATA || generic == TYPE_NONE)
+ return specific;
+ if ((generic == TYPE_GENERIC_INT_OR_BOOL) &&
+ ((specific == TYPE_INT) || (specific == TYPE_UINT) ||
+ (specific == TYPE_BOOL)))
+ return specific;
+ if ((generic == TYPE_GENERIC_INT) &&
+ ((specific == TYPE_INT) || (specific == TYPE_UINT)))
+ return specific;
+ return TYPE_NONE;
+}
+
+static ti_type
+ti_type_from_nir(nir_alu_type nir_type)
+{
+ switch (nir_alu_type_get_base_type(nir_type)) {
+ case nir_type_int:
+ return TYPE_INT;
+ case nir_type_uint:
+ return TYPE_UINT;
+ case nir_type_float:
+ return TYPE_FLOAT;
+ case nir_type_bool:
+ return TYPE_BOOL;
+ default:
+ assert(0);
+ return TYPE_NONE;
+ }
+}
+
+static ti_type
+ti_type_from_pipe_format(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_R16_FLOAT:
+ case PIPE_FORMAT_R32_FLOAT:
+ return TYPE_FLOAT;
+ case PIPE_FORMAT_R8_UINT:
+ case PIPE_FORMAT_R16_UINT:
+ case PIPE_FORMAT_R32_UINT:
+ case PIPE_FORMAT_R64_UINT:
+ return TYPE_UINT;
+ case PIPE_FORMAT_R8_SINT:
+ case PIPE_FORMAT_R16_SINT:
+ case PIPE_FORMAT_R32_SINT:
+ case PIPE_FORMAT_R64_SINT:
+ return TYPE_INT;
+ default:
+ assert(0);
+ return 0u;
+ }
+}
+
+static void
+set_type(struct hash_table *types, void *key, ti_type type)
+{
+ // convert nir_type
+ _mesa_hash_table_insert(types, key, (void *)type);
+}
+
+static ti_type
+get_type(struct hash_table *types, void *key)
+{
+ struct hash_entry *entry = _mesa_hash_table_search(types, key);
+ if (!entry)
+ return TYPE_NONE;
+ return (ti_type)(intptr_t)(entry->data);
+}
+
+static bool
+update_instr_type(struct hash_table *types, nir_instr *instr, ti_type type)
+{
+ if (instr->type == nir_instr_type_alu) {
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ switch (alu->op) {
+ case nir_op_iadd:
+ case nir_op_isub:
+ case nir_op_ishl:
+ case nir_op_iand:
+ case nir_op_ior:
+ case nir_op_ixor:
+ set_type(types, &alu->def, type);
+ set_type(types, &alu->src[0].src, type);
+ set_type(types, &alu->src[1].src, type);
+ return true;
+ case nir_op_inot:
+ set_type(types, &alu->def, type);
+ set_type(types, &alu->src[0].src, type);
+ return true;
+ case nir_op_ieq:
+ case nir_op_ine:
+ set_type(types, &alu->src[0].src, type);
+ set_type(types, &alu->src[1].src, type);
+ return true;
+ case nir_op_bcsel:
+ set_type(types, &alu->def, type);
+ set_type(types, &alu->src[1].src, type);
+ set_type(types, &alu->src[2].src, type);
+ return true;
+ case nir_op_mov:
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ set_type(types, &alu->def, type);
+ for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++)
+ set_type(types, &alu->src[i].src, type);
+ return true;
+ default:
+ return false;
+ }
+ } else if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ nir_intrinsic_info info = nir_intrinsic_infos[intr->intrinsic];
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_reg:
+ set_type(types, &intr->def, type);
+ set_type(types, &intr->src[0], type);
+ return true;
+ case nir_intrinsic_store_reg:
+ set_type(types, &intr->src[0], type);
+ set_type(types, &intr->src[1], type);
+ return true;
+ case nir_intrinsic_decl_reg:
+ set_type(types, &intr->def, type);
+ return true;
+ case nir_intrinsic_load_global:
+ case nir_intrinsic_load_global_constant:
+ case nir_intrinsic_load_global_constant_bounded:
+ case nir_intrinsic_load_global_constant_offset:
+ case nir_intrinsic_load_push_constant:
+ set_type(types, &intr->def, type);
+ return true;
+ /* Scratch and shared are always UINT */
+ case nir_intrinsic_load_scratch:
+ case nir_intrinsic_store_scratch:
+ case nir_intrinsic_load_shared:
+ case nir_intrinsic_store_shared:
+ return false;
+ case nir_intrinsic_store_global:
+ set_type(types, &intr->src[0], type);
+ return true;
+ case nir_intrinsic_read_first_invocation:
+ case nir_intrinsic_read_invocation:
+ case nir_intrinsic_quad_broadcast:
+ case nir_intrinsic_quad_swap_horizontal:
+ case nir_intrinsic_quad_swap_vertical:
+ case nir_intrinsic_quad_swap_diagonal:
+ case nir_intrinsic_shuffle:
+ case nir_intrinsic_shuffle_down:
+ case nir_intrinsic_shuffle_up:
+ case nir_intrinsic_shuffle_xor:
+ set_type(types, &intr->src[0], type);
+ set_type(types, &intr->def, type);
+ return true;
+ default:
+ if (info.has_dest && info.num_srcs == 0) {
+ set_type(types, &intr->def, type);
+ return true;
+ }
+ return false;
+ }
+ } else
+ return false;
+}
+
+static void
+infer_types_from_alu(struct hash_table *types, nir_alu_instr *alu)
+{
+ // for most types, we infer the type from the nir_op_info,
+ // but some ALU instructions are the same for int and uint. Those
+ // have their sources and defs get marked by TYPE_GENERIC_INT.
+ switch (alu->op) {
+ case nir_op_iadd:
+ case nir_op_isub:
+ case nir_op_ishl:
+ // (N, N) -> N
+ set_type(types, &alu->def, TYPE_GENERIC_INT);
+ set_type(types, &alu->src[0].src, TYPE_GENERIC_INT);
+ set_type(types, &alu->src[1].src, TYPE_GENERIC_INT);
+ break;
+ case nir_op_iand:
+ case nir_op_ior:
+ case nir_op_ixor:
+ set_type(types, &alu->def, TYPE_GENERIC_INT_OR_BOOL);
+ set_type(types, &alu->src[0].src, TYPE_GENERIC_INT_OR_BOOL);
+ set_type(types, &alu->src[1].src, TYPE_GENERIC_INT_OR_BOOL);
+ break;
+ case nir_op_inot:
+ // N -> N
+ set_type(types, &alu->def, TYPE_GENERIC_INT_OR_BOOL);
+ set_type(types, &alu->src[0].src, TYPE_GENERIC_INT_OR_BOOL);
+ break;
+ case nir_op_ieq:
+ case nir_op_ine:
+ // (N, N) -> bool
+ set_type(types, &alu->def, TYPE_BOOL);
+ set_type(types, &alu->src[0].src, TYPE_GENERIC_INT_OR_BOOL);
+ set_type(types, &alu->src[1].src, TYPE_GENERIC_INT_OR_BOOL);
+ break;
+ case nir_op_bcsel:
+ // (bool, T, T) -> T
+ set_type(types, &alu->def, TYPE_GENERIC_DATA);
+ set_type(types, &alu->src[0].src, TYPE_BOOL);
+ set_type(types, &alu->src[1].src, TYPE_GENERIC_DATA);
+ set_type(types, &alu->src[2].src, TYPE_GENERIC_DATA);
+ break;
+ // These don't provide any type information, we rely on type propagation
+ // to fill in the type data
+ case nir_op_mov:
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ break;
+ /* We don't have 32-bit width boolean, those are uints. */
+ case nir_op_b2b32:
+ set_type(types, &alu->def, TYPE_UINT);
+ set_type(types, &alu->src[0].src, TYPE_UINT);
+ break;
+
+ default: {
+ // set type for def
+ const nir_op_info *info = &nir_op_infos[alu->op];
+ set_type(types, &alu->def, ti_type_from_nir(info->output_type));
+ for (int i = 0; i < info->num_inputs; i++) {
+ // set type for src
+ set_type(types, &alu->src[i].src,
+ ti_type_from_nir(info->input_types[i]));
+ }
+ }
+ }
+}
+
+static void
+infer_types_from_intrinsic(struct hash_table *types, nir_intrinsic_instr *instr)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_interpolated_input:
+ case nir_intrinsic_load_output: {
+ ti_type ty = ti_type_from_nir(nir_intrinsic_dest_type(instr));
+ set_type(types, &instr->def, ty);
+ break;
+ }
+ case nir_intrinsic_load_global_constant:
+ set_type(types, &instr->def, TYPE_GENERIC_DATA);
+ set_type(types, &instr->src[0], TYPE_UINT);
+ break;
+ case nir_intrinsic_load_global_constant_bounded:
+ set_type(types, &instr->def, TYPE_GENERIC_DATA);
+ set_type(types, &instr->src[0], TYPE_UINT);
+ set_type(types, &instr->src[1], TYPE_UINT);
+ set_type(types, &instr->src[2], TYPE_UINT);
+ break;
+ case nir_intrinsic_load_global_constant_offset:
+ set_type(types, &instr->def, TYPE_GENERIC_DATA);
+ set_type(types, &instr->src[0], TYPE_UINT);
+ set_type(types, &instr->src[1], TYPE_UINT);
+ break;
+ case nir_intrinsic_load_global:
+ case nir_intrinsic_load_push_constant:
+ set_type(types, &instr->def, TYPE_GENERIC_DATA);
+ set_type(types, &instr->src[0], TYPE_UINT);
+ break;
+
+ case nir_intrinsic_global_atomic:
+ case nir_intrinsic_global_atomic_swap:
+ case nir_intrinsic_shared_atomic:
+ case nir_intrinsic_shared_atomic_swap: {
+ ti_type type =
+ ti_type_from_nir(nir_atomic_op_type(nir_intrinsic_atomic_op(instr)));
+ set_type(types, &instr->def, type);
+ set_type(types, &instr->src[0], TYPE_UINT);
+ set_type(types, &instr->src[1], type);
+ set_type(types, &instr->src[2], type);
+ break;
+ }
+ case nir_intrinsic_store_global:
+ set_type(types, &instr->src[0], TYPE_GENERIC_DATA);
+ set_type(types, &instr->src[1], TYPE_UINT);
+ break;
+ case nir_intrinsic_store_output: {
+ ti_type ty = ti_type_from_nir(nir_intrinsic_src_type(instr));
+ set_type(types, &instr->src[0], ty);
+ break;
+ }
+ case nir_intrinsic_decl_reg:
+ if (nir_intrinsic_bit_size(instr) == 1)
+ set_type(types, &instr->def, TYPE_BOOL);
+ else
+ set_type(types, &instr->def, TYPE_NONE);
+ break;
+ case nir_intrinsic_store_reg:
+ set_type(types, &instr->src[0], TYPE_NONE);
+ set_type(types, &instr->src[1], TYPE_NONE);
+ break;
+ case nir_intrinsic_load_reg:
+ set_type(types, &instr->src[0], TYPE_NONE);
+ set_type(types, &instr->def, TYPE_NONE);
+ break;
+ case nir_intrinsic_load_scratch:
+ case nir_intrinsic_load_shared:
+ set_type(types, &instr->def, TYPE_UINT);
+ set_type(types, &instr->src[0], TYPE_UINT);
+ break;
+ case nir_intrinsic_store_scratch:
+ case nir_intrinsic_store_shared:
+ set_type(types, &instr->src[0], TYPE_UINT);
+ set_type(types, &instr->src[1], TYPE_UINT);
+ break;
+ case nir_intrinsic_load_workgroup_id:
+ case nir_intrinsic_load_subgroup_id:
+ case nir_intrinsic_load_local_invocation_id:
+ case nir_intrinsic_load_global_invocation_id:
+ case nir_intrinsic_load_num_workgroups:
+ case nir_intrinsic_load_num_subgroups:
+ case nir_intrinsic_load_subgroup_size:
+ case nir_intrinsic_load_sample_id:
+ case nir_intrinsic_load_sample_mask:
+ case nir_intrinsic_load_subgroup_invocation:
+ case nir_intrinsic_load_amplification_id_kk:
+ set_type(types, &instr->def, TYPE_UINT);
+ break;
+ case nir_intrinsic_load_vulkan_descriptor:
+ set_type(types, &instr->src[0], TYPE_UINT);
+ set_type(types, &instr->def, TYPE_UINT);
+ break;
+ case nir_intrinsic_load_buffer_ptr_kk:
+ set_type(types, &instr->def, TYPE_UINT);
+ break;
+ // The defs of these instructions don't participate in type inference
+ // but their sources are pointers (i.e. uints).
+ case nir_intrinsic_load_texture_handle_kk:
+ case nir_intrinsic_load_depth_texture_kk:
+ set_type(types, &instr->src[0], TYPE_UINT);
+ break;
+ case nir_intrinsic_load_sampler_handle_kk:
+ set_type(types, &instr->def, TYPE_SAMPLER);
+ break;
+ case nir_intrinsic_ddx:
+ case nir_intrinsic_ddy:
+ case nir_intrinsic_ddx_coarse:
+ case nir_intrinsic_ddy_coarse:
+ case nir_intrinsic_ddx_fine:
+ case nir_intrinsic_ddy_fine:
+ set_type(types, &instr->src[0], TYPE_FLOAT);
+ set_type(types, &instr->def, TYPE_FLOAT);
+ break;
+ case nir_intrinsic_load_point_coord:
+ set_type(types, &instr->def, TYPE_FLOAT);
+ break;
+ case nir_intrinsic_load_front_face:
+ case nir_intrinsic_elect:
+ case nir_intrinsic_load_helper_invocation:
+ case nir_intrinsic_is_helper_invocation:
+ set_type(types, &instr->def, TYPE_BOOL);
+ break;
+ case nir_intrinsic_load_constant_agx:
+ set_type(types, &instr->src[0], TYPE_UINT);
+ set_type(types, &instr->src[1], TYPE_UINT);
+ set_type(types, &instr->def,
+ ti_type_from_pipe_format(nir_intrinsic_format(instr)));
+ break;
+ case nir_intrinsic_bindless_image_load:
+ set_type(types, &instr->def,
+ ti_type_from_nir(nir_intrinsic_dest_type(instr)));
+ set_type(types, &instr->src[1], TYPE_UINT); // coords
+ set_type(types, &instr->src[3], TYPE_UINT); // level
+ break;
+ case nir_intrinsic_bindless_image_store:
+ set_type(types, &instr->src[1], TYPE_UINT); // coords
+ set_type(types, &instr->src[3],
+ ti_type_from_nir(nir_intrinsic_src_type(instr)));
+ set_type(types, &instr->src[4], TYPE_UINT); // level
+ break;
+ case nir_intrinsic_demote_if:
+ case nir_intrinsic_terminate_if:
+ set_type(types, &instr->src[0], TYPE_BOOL);
+ break;
+ case nir_intrinsic_bindless_image_atomic:
+ case nir_intrinsic_bindless_image_atomic_swap: {
+ set_type(types, &instr->src[1], TYPE_UINT); // coords
+ set_type(types, &instr->src[2], TYPE_UINT); // level
+ ti_type type =
+ ti_type_from_nir(nir_atomic_op_type(nir_intrinsic_atomic_op(instr)));
+ set_type(types, &instr->src[3], type);
+ if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_swap)
+ set_type(types, &instr->src[4], type);
+ set_type(types, &instr->def, type);
+ break;
+ }
+ case nir_intrinsic_ballot:
+ set_type(types, &instr->src[0], TYPE_BOOL);
+ set_type(types, &instr->def, TYPE_UINT);
+ break;
+ case nir_intrinsic_vote_all:
+ case nir_intrinsic_vote_any:
+ set_type(types, &instr->src[0], TYPE_BOOL);
+ set_type(types, &instr->def, TYPE_BOOL);
+ break;
+ case nir_intrinsic_read_first_invocation:
+ case nir_intrinsic_quad_swap_horizontal:
+ case nir_intrinsic_quad_swap_vertical:
+ case nir_intrinsic_quad_swap_diagonal:
+ set_type(types, &instr->src[0], TYPE_GENERIC_DATA);
+ set_type(types, &instr->def, TYPE_GENERIC_DATA);
+ break;
+ case nir_intrinsic_read_invocation:
+ case nir_intrinsic_quad_broadcast:
+ case nir_intrinsic_shuffle:
+ case nir_intrinsic_shuffle_down:
+ case nir_intrinsic_shuffle_up:
+ case nir_intrinsic_shuffle_xor:
+ set_type(types, &instr->src[0], TYPE_GENERIC_DATA);
+ set_type(types, &instr->def, TYPE_GENERIC_DATA);
+ set_type(types, &instr->src[1], TYPE_UINT);
+ break;
+ case nir_intrinsic_reduce:
+ switch (nir_intrinsic_reduction_op(instr)) {
+ case nir_op_iand:
+ case nir_op_ior:
+ case nir_op_ixor:
+ case nir_op_iadd:
+ case nir_op_imul:
+ set_type(types, &instr->src[0], TYPE_GENERIC_INT);
+ set_type(types, &instr->def, TYPE_GENERIC_INT);
+ break;
+ case nir_op_imax:
+ case nir_op_imin:
+ set_type(types, &instr->src[0], TYPE_INT);
+ set_type(types, &instr->def, TYPE_INT);
+ break;
+ case nir_op_umax:
+ case nir_op_umin:
+ set_type(types, &instr->src[0], TYPE_UINT);
+ set_type(types, &instr->def, TYPE_UINT);
+ break;
+ case nir_op_fadd:
+ case nir_op_fmax:
+ case nir_op_fmin:
+ case nir_op_fmul:
+ set_type(types, &instr->src[0], TYPE_FLOAT);
+ set_type(types, &instr->def, TYPE_FLOAT);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+infer_types_from_tex(struct hash_table *types, nir_tex_instr *tex)
+{
+ set_type(types, &tex->def, ti_type_from_nir(tex->dest_type));
+ for (int i = 0; i < tex->num_srcs; i++) {
+ nir_src *src = &tex->src[i].src;
+ switch (tex->src[i].src_type) {
+ case nir_tex_src_coord:
+ if (tex->op == nir_texop_txf || tex->op == nir_texop_txf_ms)
+ set_type(types, src, TYPE_UINT);
+ else
+ set_type(types, src, TYPE_FLOAT);
+ break;
+ case nir_tex_src_comparator:
+ set_type(types, src, TYPE_FLOAT);
+ break;
+ case nir_tex_src_offset:
+ set_type(types, src, TYPE_INT);
+ break;
+ case nir_tex_src_bias:
+ set_type(types, src, TYPE_FLOAT);
+ break;
+ case nir_tex_src_lod:
+ if (tex->op == nir_texop_txf || tex->op == nir_texop_txf_ms ||
+ tex->op == nir_texop_txs)
+ set_type(types, src, TYPE_UINT);
+ else
+ set_type(types, src, TYPE_FLOAT);
+ break;
+ case nir_tex_src_min_lod:
+ set_type(types, src, TYPE_FLOAT);
+ break;
+ case nir_tex_src_ms_index:
+ set_type(types, src, TYPE_UINT);
+ break;
+ case nir_tex_src_ddx:
+ case nir_tex_src_ddy:
+ set_type(types, src, TYPE_FLOAT);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void
+infer_types_from_instr(struct hash_table *types, nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ infer_types_from_alu(types, nir_instr_as_alu(instr));
+ return;
+ case nir_instr_type_intrinsic:
+ infer_types_from_intrinsic(types, nir_instr_as_intrinsic(instr));
+ return;
+ case nir_instr_type_tex:
+ infer_types_from_tex(types, nir_instr_as_tex(instr));
+ break;
+ default:
+ break;
+ }
+}
+
+static bool
+propagate_types(struct hash_table *types, nir_instr *instr)
+{
+ bool progress = false;
+ switch (instr->type) {
+ case nir_instr_type_alu: {
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ nir_op_info info = nir_op_infos[alu->op];
+ for (int i = 0; i < info.num_inputs; i++) {
+ ti_type src_type = get_type(types, &alu->src[i].src);
+ ti_type def_type = get_type(types, alu->src[i].src.ssa);
+ ti_type unified_type = unify_types(src_type, def_type);
+ nir_instr *parent_instr = alu->src[i].src.ssa->parent_instr;
+ if (unified_type > src_type) {
+ progress |= update_instr_type(types, instr, unified_type);
+ } else if (unified_type > def_type) {
+ progress |= update_instr_type(types, parent_instr, unified_type);
+ }
+ }
+ break;
+ }
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ nir_intrinsic_info info = nir_intrinsic_infos[intr->intrinsic];
+ for (int i = 0; i < info.num_srcs; i++) {
+ ti_type src_type = get_type(types, &intr->src[i]);
+ ti_type def_type = get_type(types, intr->src[i].ssa);
+ ti_type unified_type = unify_types(src_type, def_type);
+ nir_instr *parent_instr = intr->src[i].ssa->parent_instr;
+ if (unified_type > src_type) {
+ progress |= update_instr_type(types, instr, unified_type);
+ } else if (unified_type > def_type) {
+ progress |= update_instr_type(types, parent_instr, unified_type);
+ }
+ }
+ break;
+ }
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ for (int i = 0; i < tex->num_srcs; i++) {
+ ti_type src_type = get_type(types, &tex->src[i].src);
+ ti_type def_type = get_type(types, tex->src[i].src.ssa);
+ ti_type unified_type = unify_types(src_type, def_type);
+ if (src_type == 0)
+ continue;
+ nir_instr *parent_instr = tex->src[i].src.ssa->parent_instr;
+ if (unified_type > def_type) {
+ progress |= update_instr_type(types, parent_instr, unified_type);
+ }
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return progress;
+}
+
+static const char *float_names[] = {"float", "float2", "float3", "float4"};
+static const char *half_names[] = {"half", "half2", "half3", "half4"};
+static const char *bool_names[] = {"bool", "bool2", "bool3", "bool4"};
+static const char *int8_names[] = {"char", "char2", "char3", "char4"};
+static const char *uint8_names[] = {"uchar", "uchar2", "uchar3", "uchar4"};
+static const char *int16_names[] = {"short", "short2", "short3", "short4"};
+static const char *uint16_names[] = {"ushort", "ushort2", "ushort3", "ushort4"};
+static const char *int32_names[] = {"int", "int2", "int3", "int4"};
+static const char *uint32_names[] = {"uint", "uint2", "uint3", "uint4"};
+static const char *int64_names[] = {"long", "long2", "long3", "long4"};
+static const char *uint64_names[] = {"ulong", "ulong2", "ulong3", "ulong4"};
+
+static const char *
+ti_type_to_msl_type(ti_type type, uint8_t bit_width, uint8_t num_components)
+{
+ switch (type) {
+ case TYPE_GENERIC_DATA:
+ case TYPE_GENERIC_INT:
+ case TYPE_GENERIC_INT_OR_BOOL:
+ case TYPE_UINT:
+ switch (bit_width) {
+ case 1:
+ return bool_names[num_components - 1];
+ case 8:
+ return uint8_names[num_components - 1];
+ case 16:
+ return uint16_names[num_components - 1];
+ case 32:
+ return uint32_names[num_components - 1];
+ case 64:
+ return uint64_names[num_components - 1];
+ default:
+ assert(!"Bad uint length");
+ }
+ break;
+ case TYPE_BOOL:
+ return bool_names[num_components - 1];
+ case TYPE_INT:
+ switch (bit_width) {
+ case 8:
+ return int8_names[num_components - 1];
+ case 16:
+ return int16_names[num_components - 1];
+ case 32:
+ return int32_names[num_components - 1];
+ case 64:
+ return int64_names[num_components - 1];
+ default:
+ assert(!"Bad uint length");
+ }
+ break;
+ case TYPE_FLOAT:
+ switch (bit_width) {
+ case 16:
+ return half_names[num_components - 1];
+ case 32:
+ return float_names[num_components - 1];
+ default:
+ assert(!"Bad float length");
+ }
+ break;
+ case TYPE_SAMPLER:
+ return "sampler";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+const char *
+msl_uint_type(uint8_t bit_size, uint8_t num_components)
+{
+ return ti_type_to_msl_type(TYPE_UINT, bit_size, num_components);
+}
+
+const char *
+msl_type_for_def(struct hash_table *types, nir_def *def)
+{
+ ti_type type = get_type(types, def);
+ return ti_type_to_msl_type(type, def->bit_size, def->num_components);
+}
+
+const char *
+msl_type_for_src(struct hash_table *types, nir_src *src)
+{
+ ti_type type = get_type(types, src);
+ // This won't necessarily work for alu srcs but for intrinsics it's fine.
+ return ti_type_to_msl_type(type, src->ssa->bit_size,
+ src->ssa->num_components);
+}
+
+const char *
+msl_bitcast_for_src(struct hash_table *types, nir_src *src)
+{
+ ti_type src_type = get_type(types, src);
+ ti_type def_type = get_type(types, src->ssa);
+ if (nir_src_is_if(src))
+ return NULL;
+ if (src_type != def_type) {
+ /* bool types cannot use as_type casting */
+ if (src_type == TYPE_BOOL || def_type == TYPE_BOOL)
+ return NULL;
+
+ // produce bitcast _into_ src_type
+ return ti_type_to_msl_type(src_type, src->ssa->bit_size,
+ src->ssa->num_components);
+ } else {
+ return NULL;
+ }
+}
+
+static void
+emit_src_component(struct nir_to_msl_ctx *ctx, nir_src *src, unsigned comp)
+{
+ ti_type type = get_type(ctx->types, src);
+ switch (type) {
+ case TYPE_FLOAT: {
+ double v = nir_src_comp_as_float(*src, comp);
+ if (isinf(v)) {
+ P(ctx, "(INFINITY");
+ } else if (isnan(v)) {
+ P(ctx, "(NAN");
+ } else {
+ /* Building the types explicitly is required since the MSL compiler is
+ * too dumb to understand that "max(as_type<int>(t53), -2147483648)" is
+ * not ambiguous since both are ints and there's no room for longs.
+ * From CTS test:
+ * dEQP-VK.renderpass.suballocation.multisample.r32_sint.samples_2 */
+ if (src->ssa->bit_size == 16) {
+ P(ctx, "half(");
+ } else {
+ P(ctx, "float(");
+ }
+ P(ctx, "%.*le", DBL_DECIMAL_DIG, nir_src_comp_as_float(*src, comp));
+ }
+ break;
+ }
+ case TYPE_BOOL:
+ P(ctx, "bool(%d", nir_src_comp_as_bool(*src, comp));
+ break;
+ case TYPE_INT:
+ switch (src->ssa->bit_size) {
+ case 8:
+ P(ctx, "char(");
+ break;
+ case 16:
+ P(ctx, "short(");
+ break;
+ case 32:
+ P(ctx, "int(");
+ break;
+ case 64:
+ P(ctx, "long(");
+ break;
+ default:
+ UNREACHABLE("Incorrect bit_size for TYPE_INT");
+ }
+ P(ctx, "%" PRId64, nir_src_comp_as_int(*src, comp));
+ break;
+ case TYPE_UINT:
+ case TYPE_GENERIC_DATA:
+ case TYPE_GENERIC_INT:
+ case TYPE_GENERIC_INT_OR_BOOL:
+ switch (src->ssa->bit_size) {
+ case 8:
+ P(ctx, "uchar(");
+ break;
+ case 16:
+ P(ctx, "ushort(");
+ break;
+ case 32:
+ P(ctx, "uint(");
+ break;
+ case 64:
+ P(ctx, "ulong(");
+ break;
+ default:
+ UNREACHABLE("Incorrect bit_size for TYPE_UINT");
+ }
+ P(ctx, "%" PRIu64 "u", nir_src_comp_as_uint(*src, comp));
+ break;
+ case TYPE_NONE:
+ assert(0);
+ P(ctx, "UNTYPED!");
+ break;
+ default:
+ return;
+ }
+ P(ctx, ")");
+}
+
+void
+msl_src_as_const(struct nir_to_msl_ctx *ctx, nir_src *src)
+{
+ ti_type type = get_type(ctx->types, src);
+ if (src->ssa->num_components == 1) {
+ emit_src_component(ctx, src, 0);
+ } else {
+ P(ctx, "%s(",
+ ti_type_to_msl_type(type, src->ssa->bit_size,
+ src->ssa->num_components));
+ for (int i = 0; i < src->ssa->num_components; i++) {
+ if (i)
+ P(ctx, ", ");
+ emit_src_component(ctx, src, i);
+ }
+ P(ctx, ")");
+ }
+}
+
+struct hash_table *
+msl_infer_types(nir_shader *shader)
+{
+ struct hash_table *types = _mesa_pointer_hash_table_create(NULL);
+ bool progress = false;
+ // First, seed the types for every instruction for every source and def
+ nir_foreach_function_impl(impl, shader) {
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr(instr, block) {
+ infer_types_from_instr(types, instr);
+ }
+ }
+ }
+
+ do {
+ progress = false;
+ nir_foreach_function_impl(impl, shader) {
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr(instr, block) {
+ progress |= propagate_types(types, instr);
+ }
+ }
+ }
+ } while (progress);
+ return types;
+}
+
+bool
+msl_src_is_float(struct nir_to_msl_ctx *ctx, nir_src *src)
+{
+ return get_type(ctx->types, src) == TYPE_FLOAT;
+}
+
+bool
+msl_def_is_sampler(struct nir_to_msl_ctx *ctx, nir_def *def)
+{
+ return get_type(ctx->types, def) == TYPE_SAMPLER;
+}
diff --git a/src/kosmickrisp/compiler/nir_to_msl.c b/src/kosmickrisp/compiler/nir_to_msl.c
new file mode 100644
index 00000000000..51b96bb2c62
--- /dev/null
+++ b/src/kosmickrisp/compiler/nir_to_msl.c
@@ -0,0 +1,2051 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "nir_to_msl.h"
+#include "msl_private.h"
+#include "nir.h"
+
+static const char *
+get_stage_string(mesa_shader_stage stage)
+{
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ return "vertex";
+ case MESA_SHADER_FRAGMENT:
+ return "fragment";
+ case MESA_SHADER_COMPUTE:
+ return "kernel";
+ default:
+ assert(0);
+ return "";
+ }
+}
+
+static const char *
+get_entrypoint_name(nir_shader *shader)
+{
+ return nir_shader_get_entrypoint(shader)->function->name;
+}
+
+static const char *sysval_table[SYSTEM_VALUE_MAX] = {
+ [SYSTEM_VALUE_SUBGROUP_SIZE] =
+ "uint gl_SubGroupSize [[threads_per_simdgroup]]",
+ [SYSTEM_VALUE_SUBGROUP_INVOCATION] =
+ "uint gl_SubGroupInvocation [[thread_index_in_simdgroup]]",
+ [SYSTEM_VALUE_NUM_SUBGROUPS] =
+ "uint gl_NumSubGroups [[simdgroups_per_threadgroup]]",
+ [SYSTEM_VALUE_SUBGROUP_ID] =
+ "uint gl_SubGroupID [[simdgroup_index_in_threadgroup]]",
+ [SYSTEM_VALUE_WORKGROUP_ID] =
+ "uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]",
+ [SYSTEM_VALUE_LOCAL_INVOCATION_ID] =
+ "uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]",
+ [SYSTEM_VALUE_GLOBAL_INVOCATION_ID] =
+ "uint3 gl_GlobalInvocationID [[thread_position_in_grid]]",
+ [SYSTEM_VALUE_NUM_WORKGROUPS] =
+ "uint3 gl_NumWorkGroups [[threadgroups_per_grid]]",
+ [SYSTEM_VALUE_LOCAL_INVOCATION_INDEX] =
+ "uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]",
+ [SYSTEM_VALUE_VERTEX_ID] = "uint gl_VertexID [[vertex_id]]",
+ [SYSTEM_VALUE_INSTANCE_ID] = "uint gl_InstanceID [[instance_id]]",
+ [SYSTEM_VALUE_BASE_INSTANCE] = "uint gl_BaseInstance [[base_instance]]",
+ [SYSTEM_VALUE_FRAG_COORD] = "float4 gl_FragCoord [[position]]",
+ [SYSTEM_VALUE_POINT_COORD] = "float2 gl_PointCoord [[point_coord]]",
+ [SYSTEM_VALUE_FRONT_FACE] = "bool gl_FrontFacing [[front_facing]]",
+ [SYSTEM_VALUE_LAYER_ID] = "uint gl_Layer [[render_target_array_index]]",
+ [SYSTEM_VALUE_SAMPLE_ID] = "uint gl_SampleID [[sample_id]]",
+ [SYSTEM_VALUE_SAMPLE_MASK_IN] = "uint gl_SampleMask [[sample_mask]]",
+ [SYSTEM_VALUE_AMPLIFICATION_ID_KK] =
+ "uint mtl_AmplificationID [[amplification_id]]",
+ /* These are functions and not shader input variables */
+ [SYSTEM_VALUE_HELPER_INVOCATION] = "",
+};
+
+static void
+emit_sysvals(struct nir_to_msl_ctx *ctx, nir_shader *shader)
+{
+ unsigned i;
+ BITSET_FOREACH_SET(i, shader->info.system_values_read, SYSTEM_VALUE_MAX) {
+ assert(sysval_table[i]);
+ if (sysval_table[i] && sysval_table[i][0])
+ P_IND(ctx, "%s,\n", sysval_table[i]);
+ }
+}
+
+static void
+emit_inputs(struct nir_to_msl_ctx *ctx, nir_shader *shader)
+{
+ switch (shader->info.stage) {
+ case MESA_SHADER_FRAGMENT:
+ P_IND(ctx, "FragmentIn in [[stage_in]],\n");
+ break;
+ default:
+ break;
+ }
+ P_IND(ctx, "constant Buffer &buf0 [[buffer(0)]],\n");
+ P_IND(ctx, "constant SamplerTable &sampler_table [[buffer(1)]]\n");
+}
+
+static const char *
+output_type(nir_shader *shader)
+{
+ switch (shader->info.stage) {
+ case MESA_SHADER_VERTEX:
+ return "VertexOut";
+ case MESA_SHADER_FRAGMENT:
+ return "FragmentOut";
+ default:
+ return "void";
+ }
+}
+
+static void
+emit_local_vars(struct nir_to_msl_ctx *ctx, nir_shader *shader)
+{
+ if (shader->info.shared_size) {
+ P_IND(ctx, "threadgroup char shared_data[%d];\n",
+ shader->info.shared_size);
+ }
+ if (shader->scratch_size) {
+ P_IND(ctx, "uchar scratch[%d] = {0};\n", shader->scratch_size);
+ }
+ if (BITSET_TEST(shader->info.system_values_read,
+ SYSTEM_VALUE_HELPER_INVOCATION)) {
+ P_IND(ctx, "bool gl_HelperInvocation = simd_is_helper_thread();\n");
+ }
+}
+
+static bool
+is_register(nir_def *def)
+{
+ return ((def->parent_instr->type == nir_instr_type_intrinsic) &&
+ (nir_instr_as_intrinsic(def->parent_instr)->intrinsic ==
+ nir_intrinsic_load_reg));
+}
+
+static void
+writemask_to_msl(struct nir_to_msl_ctx *ctx, unsigned write_mask,
+ unsigned num_components)
+{
+ if (num_components != util_bitcount(write_mask)) {
+ P(ctx, ".");
+ for (unsigned i = 0; i < num_components; i++)
+ if ((write_mask >> i) & 1)
+ P(ctx, "%c", "xyzw"[i]);
+ }
+}
+
+static void
+src_to_msl(struct nir_to_msl_ctx *ctx, nir_src *src)
+{
+ /* Pointer types cannot use as_type casting */
+ const char *bitcast = msl_bitcast_for_src(ctx->types, src);
+ if (nir_src_is_const(*src)) {
+ msl_src_as_const(ctx, src);
+ return;
+ }
+ if (nir_src_is_undef(*src)) {
+ if (src->ssa->num_components == 1) {
+ P(ctx, "00");
+ } else {
+ P(ctx, "%s(", msl_type_for_src(ctx->types, src));
+ for (int i = 0; i < src->ssa->num_components; i++) {
+ if (i)
+ P(ctx, ", ");
+ P(ctx, "00");
+ }
+ P(ctx, ")");
+ }
+ return;
+ }
+
+ if (bitcast)
+ P(ctx, "as_type<%s>(", bitcast);
+ if (is_register(src->ssa)) {
+ nir_intrinsic_instr *instr =
+ nir_instr_as_intrinsic(src->ssa->parent_instr);
+ if (src->ssa->bit_size != 1u) {
+ P(ctx, "as_type<%s>(r%d)", msl_type_for_def(ctx->types, src->ssa),
+ instr->src[0].ssa->index);
+ } else {
+ P(ctx, "%s(r%d)", msl_type_for_def(ctx->types, src->ssa),
+ instr->src[0].ssa->index);
+ }
+ } else if (nir_src_is_const(*src)) {
+ msl_src_as_const(ctx, src);
+ } else {
+ P(ctx, "t%d", src->ssa->index);
+ }
+ if (bitcast)
+ P(ctx, ")");
+}
+
+static void
+alu_src_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr, int srcn)
+{
+ nir_alu_src *src = &instr->src[srcn];
+ src_to_msl(ctx, &src->src);
+ if (!nir_alu_src_is_trivial_ssa(instr, srcn) &&
+ src->src.ssa->num_components > 1) {
+ int num_components = nir_src_num_components(src->src);
+ assert(num_components <= 4);
+
+ P(ctx, ".");
+ for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
+ if (!nir_alu_instr_channel_used(instr, srcn, i))
+ continue;
+ P(ctx, "%c", "xyzw"[src->swizzle[i]]);
+ }
+ }
+}
+
+static void
+alu_funclike(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr, const char *name)
+{
+ const nir_op_info *info = &nir_op_infos[instr->op];
+ P(ctx, "%s(", name);
+ for (int i = 0; i < info->num_inputs; i++) {
+ alu_src_to_msl(ctx, instr, i);
+ if (i < info->num_inputs - 1)
+ P(ctx, ", ");
+ }
+ P(ctx, ")");
+}
+
+static void
+alu_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr)
+{
+
+#define ALU_BINOP(op) \
+ do { \
+ alu_src_to_msl(ctx, instr, 0); \
+ P(ctx, " %s ", op); \
+ alu_src_to_msl(ctx, instr, 1); \
+ } while (0);
+
+ switch (instr->op) {
+ case nir_op_isign:
+ alu_src_to_msl(ctx, instr, 0);
+ P(ctx, " == 0 ? 0.0 : ((");
+ alu_src_to_msl(ctx, instr, 0);
+ P(ctx, " < 0) ? -1 : 1)");
+ break;
+ case nir_op_iadd:
+ case nir_op_fadd:
+ ALU_BINOP("+");
+ break;
+ case nir_op_uadd_sat:
+ case nir_op_iadd_sat:
+ alu_funclike(ctx, instr, "addsat");
+ break;
+ case nir_op_isub:
+ case nir_op_fsub:
+ ALU_BINOP("-");
+ break;
+ case nir_op_imul:
+ case nir_op_fmul:
+ ALU_BINOP("*");
+ break;
+ case nir_op_idiv:
+ case nir_op_udiv:
+ case nir_op_fdiv:
+ ALU_BINOP("/");
+ break;
+ case nir_op_irem:
+ ALU_BINOP("%");
+ break;
+ case nir_op_ishl:
+ ALU_BINOP("<<");
+ break;
+ case nir_op_ishr:
+ case nir_op_ushr:
+ ALU_BINOP(">>");
+ break;
+ case nir_op_ige:
+ case nir_op_uge:
+ case nir_op_fge:
+ ALU_BINOP(">=");
+ break;
+ case nir_op_ilt:
+ case nir_op_ult:
+ case nir_op_flt:
+ ALU_BINOP("<")
+ break;
+ case nir_op_iand:
+ ALU_BINOP("&");
+ break;
+ case nir_op_ior:
+ ALU_BINOP("|");
+ break;
+ case nir_op_ixor:
+ ALU_BINOP("^");
+ break;
+ case nir_op_bitfield_insert:
+ alu_funclike(ctx, instr, "insert_bits");
+ break;
+ case nir_op_ibitfield_extract:
+ case nir_op_ubitfield_extract:
+ alu_funclike(ctx, instr, "extract_bits");
+ break;
+ case nir_op_bitfield_reverse:
+ alu_funclike(ctx, instr, "reverse_bits");
+ break;
+ case nir_op_bit_count:
+ alu_funclike(ctx, instr, "popcount");
+ break;
+ case nir_op_uclz:
+ alu_funclike(ctx, instr, "clz");
+ break;
+ case nir_op_ieq:
+ case nir_op_feq:
+ ALU_BINOP("==");
+ break;
+ case nir_op_ine:
+ case nir_op_fneu:
+ ALU_BINOP("!=");
+ break;
+ case nir_op_umax:
+ case nir_op_imax:
+ alu_funclike(ctx, instr, "max");
+ break;
+ case nir_op_umin:
+ case nir_op_imin:
+ alu_funclike(ctx, instr, "min");
+ break;
+ case nir_op_umod:
+ case nir_op_imod:
+ ALU_BINOP("%");
+ break;
+ case nir_op_imul_high:
+ case nir_op_umul_high:
+ alu_funclike(ctx, instr, "mulhi");
+ break;
+ case nir_op_usub_sat:
+ alu_funclike(ctx, instr, "subsat");
+ break;
+ case nir_op_fsat:
+ alu_funclike(ctx, instr, "saturate");
+ break;
+ /* Functions from <metal_relational> */
+ case nir_op_fisfinite:
+ alu_funclike(ctx, instr, "isfinite");
+ break;
+ case nir_op_fisnormal:
+ alu_funclike(ctx, instr, "isnormal");
+ break;
+ /* Functions from <metal_math> */
+ case nir_op_iabs:
+ case nir_op_fabs:
+ alu_funclike(ctx, instr, "abs");
+ break;
+ case nir_op_fceil:
+ alu_funclike(ctx, instr, "ceil");
+ break;
+ case nir_op_fcos:
+ alu_funclike(ctx, instr, "cos");
+ break;
+ case nir_op_fdot2:
+ case nir_op_fdot3:
+ case nir_op_fdot4:
+ alu_funclike(ctx, instr, "dot");
+ break;
+ case nir_op_fexp2:
+ alu_funclike(ctx, instr, "exp2");
+ break;
+ case nir_op_ffloor:
+ alu_funclike(ctx, instr, "floor");
+ break;
+ case nir_op_ffma:
+ alu_funclike(ctx, instr, "fma");
+ break;
+ case nir_op_ffract:
+ alu_funclike(ctx, instr, "fract");
+ break;
+ case nir_op_flog2:
+ alu_funclike(ctx, instr, "log2");
+ break;
+ case nir_op_flrp:
+ alu_funclike(ctx, instr, "mix");
+ break;
+ case nir_op_fmax:
+ alu_funclike(ctx, instr, "fmax");
+ break;
+ case nir_op_fmin:
+ alu_funclike(ctx, instr, "fmin");
+ break;
+ case nir_op_frem:
+ alu_funclike(ctx, instr, "fmod");
+ break;
+ case nir_op_fpow:
+ alu_funclike(ctx, instr, "pow");
+ break;
+ case nir_op_fround_even:
+ alu_funclike(ctx, instr, "rint");
+ break;
+ case nir_op_frsq:
+ alu_funclike(ctx, instr, "rsqrt");
+ break;
+ case nir_op_fsign:
+ alu_funclike(ctx, instr, "sign");
+ break;
+ case nir_op_fsqrt:
+ alu_funclike(ctx, instr, "sqrt");
+ break;
+ case nir_op_fsin:
+ alu_funclike(ctx, instr, "sin");
+ break;
+ case nir_op_ldexp:
+ alu_funclike(ctx, instr, "ldexp");
+ break;
+ case nir_op_ftrunc:
+ alu_funclike(ctx, instr, "trunc");
+ break;
+ case nir_op_pack_snorm_4x8:
+ alu_funclike(ctx, instr, "pack_float_to_snorm4x8");
+ break;
+ case nir_op_pack_unorm_4x8:
+ alu_funclike(ctx, instr, "pack_float_to_unorm4x8");
+ break;
+ case nir_op_pack_snorm_2x16:
+ alu_funclike(ctx, instr, "pack_float_to_snorm2x16");
+ break;
+ case nir_op_pack_unorm_2x16:
+ alu_funclike(ctx, instr, "pack_float_to_unorm2x16");
+ break;
+ case nir_op_unpack_snorm_4x8:
+ alu_funclike(ctx, instr, "unpack_snorm4x8_to_float");
+ break;
+ case nir_op_unpack_unorm_4x8:
+ alu_funclike(ctx, instr, "unpack_unorm4x8_to_float");
+ break;
+ case nir_op_unpack_snorm_2x16:
+ alu_funclike(ctx, instr, "unpack_snorm2x16_to_float");
+ break;
+ case nir_op_unpack_unorm_2x16:
+ alu_funclike(ctx, instr, "unpack_unorm2x16_to_float");
+ break;
+ case nir_op_vec2:
+ case nir_op_vec3:
+ case nir_op_vec4:
+ case nir_op_b2b1:
+ case nir_op_b2b32:
+ case nir_op_b2i8:
+ case nir_op_b2i16:
+ case nir_op_b2i32:
+ case nir_op_b2i64:
+ case nir_op_b2f16:
+ case nir_op_i2f16:
+ case nir_op_u2f16:
+ case nir_op_i2f32:
+ case nir_op_u2f32:
+ case nir_op_i2i8:
+ case nir_op_i2i16:
+ case nir_op_i2i32:
+ case nir_op_i2i64:
+ case nir_op_f2i8:
+ case nir_op_f2i16:
+ case nir_op_f2i32:
+ case nir_op_f2i64:
+ case nir_op_f2u8:
+ case nir_op_f2u16:
+ case nir_op_f2u32:
+ case nir_op_f2u64:
+ case nir_op_u2u8:
+ case nir_op_u2u16:
+ case nir_op_u2u32:
+ case nir_op_u2u64:
+ case nir_op_f2f16:
+ case nir_op_f2f16_rtne:
+ case nir_op_f2f32:
+ alu_funclike(ctx, instr, msl_type_for_def(ctx->types, &instr->def));
+ break;
+ case nir_op_unpack_half_2x16_split_x:
+ P(ctx, "float(as_type<half>(ushort(t%d & 0x0000ffff)))",
+ instr->src[0].src.ssa->index);
+ break;
+ case nir_op_frcp:
+ P(ctx, "1/");
+ alu_src_to_msl(ctx, instr, 0);
+ break;
+ case nir_op_inot:
+ if (instr->src[0].src.ssa->bit_size == 1) {
+ P(ctx, "!");
+ } else {
+ P(ctx, "~");
+ }
+ alu_src_to_msl(ctx, instr, 0);
+ break;
+ case nir_op_ineg:
+ case nir_op_fneg:
+ P(ctx, "-");
+ alu_src_to_msl(ctx, instr, 0);
+ break;
+ case nir_op_mov:
+ alu_src_to_msl(ctx, instr, 0);
+ break;
+ case nir_op_b2f32:
+ alu_src_to_msl(ctx, instr, 0);
+ P(ctx, " ? 1.0 : 0.0");
+ break;
+ case nir_op_bcsel:
+ alu_src_to_msl(ctx, instr, 0);
+ P(ctx, " ? ");
+ alu_src_to_msl(ctx, instr, 1);
+ P(ctx, " : ");
+ alu_src_to_msl(ctx, instr, 2);
+ break;
+ default:
+ P(ctx, "ALU %s", nir_op_infos[instr->op].name);
+ }
+}
+
+static const char *
+texture_dim(enum glsl_sampler_dim dim)
+{
+ switch (dim) {
+ case GLSL_SAMPLER_DIM_1D:
+ return "1d";
+ case GLSL_SAMPLER_DIM_2D:
+ return "2d";
+ case GLSL_SAMPLER_DIM_3D:
+ return "3d";
+ case GLSL_SAMPLER_DIM_CUBE:
+ return "cube";
+ case GLSL_SAMPLER_DIM_BUF:
+ return "_buffer";
+ case GLSL_SAMPLER_DIM_MS:
+ return "2d_ms";
+ default:
+ fprintf(stderr, "Bad texture dim %d\n", dim);
+ assert(!"Bad texture dimension");
+ return "BAD";
+ }
+}
+
+static const char *
+tex_type_name(nir_alu_type ty)
+{
+ switch (ty) {
+ case nir_type_int16:
+ return "short";
+ case nir_type_int32:
+ return "int";
+ case nir_type_uint16:
+ return "ushort";
+ case nir_type_uint32:
+ return "uint";
+ case nir_type_float16:
+ return "half";
+ case nir_type_float32:
+ return "float";
+ default:
+ return "BAD";
+ }
+}
+
+static bool
+instrinsic_needs_dest_type(nir_intrinsic_instr *instr)
+{
+ const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+ nir_intrinsic_op op = instr->intrinsic;
+ if (op == nir_intrinsic_decl_reg || op == nir_intrinsic_load_reg ||
+ op == nir_intrinsic_load_texture_handle_kk ||
+ op == nir_intrinsic_load_depth_texture_kk ||
+ /* Atomic swaps have a custom codegen */
+ op == nir_intrinsic_global_atomic_swap ||
+ op == nir_intrinsic_shared_atomic_swap ||
+ op == nir_intrinsic_bindless_image_atomic_swap)
+ return false;
+ return info->has_dest;
+}
+
+static const char *
+msl_pipe_format_to_msl_type(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_R16_FLOAT:
+ return "half";
+ case PIPE_FORMAT_R32_FLOAT:
+ return "float";
+ case PIPE_FORMAT_R8_UINT:
+ return "uchar";
+ case PIPE_FORMAT_R16_UINT:
+ return "ushort";
+ case PIPE_FORMAT_R32_UINT:
+ return "uint";
+ case PIPE_FORMAT_R64_UINT:
+ return "unsigned long";
+ case PIPE_FORMAT_R8_SINT:
+ return "char";
+ case PIPE_FORMAT_R16_SINT:
+ return "short";
+ case PIPE_FORMAT_R32_SINT:
+ return "int";
+ case PIPE_FORMAT_R64_SINT:
+ return "long";
+ default:
+ assert(0);
+ return "";
+ }
+}
+
+static const char *
+component_str(uint8_t num_components)
+{
+ switch (num_components) {
+ default:
+ case 1:
+ return "";
+ case 2:
+ return "2";
+ case 3:
+ return "3";
+ case 4:
+ return "4";
+ }
+}
+
+static void
+round_src_component_to_uint(struct nir_to_msl_ctx *ctx, nir_src *src,
+ char component)
+{
+ bool is_float = msl_src_is_float(ctx, src);
+ if (is_float) {
+ P(ctx, "uint(rint(");
+ }
+ src_to_msl(ctx, src);
+ P(ctx, ".%c", component);
+ if (is_float) {
+ P(ctx, "))");
+ }
+}
+
+static void
+texture_src_coord_swizzle(struct nir_to_msl_ctx *ctx, nir_src *coord,
+ uint32_t num_components, bool is_cube, bool is_array)
+{
+ src_to_msl(ctx, coord);
+
+ uint32_t coord_components =
+ num_components - (uint32_t)is_array - (uint32_t)is_cube;
+ if (coord_components < coord->ssa->num_components) {
+ const char *swizzle = "xyzw";
+ uint32_t i = 0;
+ P(ctx, ".");
+ for (i = 0; i < coord_components; i++)
+ P(ctx, "%c", swizzle[i]);
+
+ if (is_cube) {
+ P(ctx, ", ");
+ round_src_component_to_uint(ctx, coord, swizzle[i++]);
+ }
+ if (is_array) {
+ P(ctx, ", ");
+ round_src_component_to_uint(ctx, coord, swizzle[i++]);
+ }
+ }
+}
+
+static void
+image_coord_swizzle(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
+{
+ unsigned comps = 0;
+ bool is_array = nir_intrinsic_image_array(instr);
+ bool is_cube = false;
+ switch (nir_intrinsic_image_dim(instr)) {
+ case GLSL_SAMPLER_DIM_BUF:
+ case GLSL_SAMPLER_DIM_1D:
+ comps = 1;
+ break;
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_MS:
+ comps = 2;
+ break;
+ case GLSL_SAMPLER_DIM_3D:
+ comps = 3;
+ break;
+ case GLSL_SAMPLER_DIM_CUBE:
+ comps = 3;
+ is_cube = true;
+ break;
+ default:
+ assert(!"Bad dimension for image");
+ break;
+ }
+ if (is_array)
+ comps += 1;
+
+ texture_src_coord_swizzle(ctx, &instr->src[1], comps, is_cube, is_array);
+}
+
+/* Non-packed types have stricter alignment requirements that packed types.
+ * This helps us build a packed format for storage.
+ */
+static void
+src_to_packed(struct nir_to_msl_ctx *ctx, nir_src *src, const char *type,
+ uint32_t component_count)
+{
+ if (component_count == 1) {
+ P(ctx, "%s(", type);
+ } else {
+ P(ctx, "packed_%s(", type);
+ }
+ src_to_msl(ctx, src);
+ P(ctx, ")");
+}
+
+/* Non-packed types have stricter alignment requirements that packed types.
+ * This helps us cast the pointer to a packed type and then it builds the
+ * non-packed type for Metal usage.
+ */
+static void
+src_to_packed_load(struct nir_to_msl_ctx *ctx, nir_src *src,
+ const char *addressing, const char *type,
+ uint32_t component_count)
+{
+ if (component_count == 1) {
+ P(ctx, "*(%s %s*)(", addressing, type);
+ } else {
+ P(ctx, "%s(*(%s packed_%s*)", type, addressing, type);
+ }
+ src_to_msl(ctx, src);
+ P(ctx, ")");
+}
+
+/* Non-packed types have stricter alignment requirements that packed types.
+ * This helps us cast the pointer to a packed type and then it builds the
+ * non-packed type for Metal usage.
+ */
+static void
+src_to_packed_load_offset(struct nir_to_msl_ctx *ctx, nir_src *src,
+ nir_src *offset, const char *addressing,
+ const char *type, uint32_t component_count)
+{
+ if (component_count == 1) {
+ P(ctx, "*(%s %s*)((", addressing, type);
+ } else {
+ P(ctx, "%s(*(%s packed_%s*)(", type, addressing, type);
+ }
+ src_to_msl(ctx, src);
+ P(ctx, " + ");
+ src_to_msl(ctx, offset);
+ P(ctx, "))");
+}
+
+/* Non-packed types have stricter alignment requirements that packed types.
+ * This helps us cast the pointer to a packed type for storage.
+ */
+static void
+src_to_packed_store(struct nir_to_msl_ctx *ctx, nir_src *src,
+ const char *addressing, const char *type,
+ uint32_t num_components)
+{
+ if (num_components == 1) {
+ P_IND(ctx, "*(%s %s*)", addressing, type);
+ } else {
+ P_IND(ctx, "*(%s packed_%s*)", addressing, type);
+ }
+ src_to_msl(ctx, src);
+}
+
+static const char *
+atomic_op_to_msl(nir_atomic_op op)
+{
+ switch (op) {
+ case nir_atomic_op_iadd:
+ case nir_atomic_op_fadd:
+ return "atomic_fetch_add";
+ case nir_atomic_op_umin:
+ case nir_atomic_op_imin:
+ case nir_atomic_op_fmin:
+ return "atomic_fetch_min";
+ case nir_atomic_op_umax:
+ case nir_atomic_op_imax:
+ case nir_atomic_op_fmax:
+ return "atomic_fetch_max";
+ case nir_atomic_op_iand:
+ return "atomic_fetch_and";
+ case nir_atomic_op_ior:
+ return "atomic_fetch_or";
+ case nir_atomic_op_ixor:
+ return "atomic_fetch_xor";
+ case nir_atomic_op_xchg:
+ return "atomic_exchange";
+ case nir_atomic_op_cmpxchg:
+ case nir_atomic_op_fcmpxchg:
+ return "atomic_compare_exchange_weak";
+ default:
+ UNREACHABLE("Unhandled atomic op");
+ }
+}
+
+static void
+atomic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr,
+ const char *scope, bool shared)
+{
+ const char *atomic_op = atomic_op_to_msl(nir_intrinsic_atomic_op(instr));
+ const char *mem_order = "memory_order_relaxed";
+
+ P(ctx, "%s_explicit((%s atomic_%s*)", atomic_op, scope,
+ msl_type_for_def(ctx->types, &instr->def));
+ if (shared)
+ P(ctx, "&shared_data[");
+ src_to_msl(ctx, &instr->src[0]);
+ if (shared)
+ P(ctx, "]");
+ P(ctx, ", ");
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, ", %s", mem_order);
+ P(ctx, ");\n");
+}
+
+static void
+atomic_swap_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr,
+ const char *scope, bool shared)
+{
+ const char *atomic_op = atomic_op_to_msl(nir_intrinsic_atomic_op(instr));
+ const char *mem_order = "memory_order_relaxed";
+ const char *type = msl_type_for_def(ctx->types, &instr->def);
+
+ P_IND(ctx, "%s ta%d = ", type, instr->def.index);
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, "; %s_explicit((%s atomic_%s*)", atomic_op, scope, type);
+ if (shared)
+ P(ctx, "&shared_data[");
+ src_to_msl(ctx, &instr->src[0]);
+ if (shared)
+ P(ctx, "]");
+ P(ctx, ", ");
+ P(ctx, "&ta%d, ", instr->def.index);
+ src_to_msl(ctx, &instr->src[2]);
+ P(ctx, ", %s, %s);", mem_order, mem_order);
+ P(ctx, "%s t%d = ta%d;\n", type, instr->def.index, instr->def.index);
+}
+
+static void
+memory_modes_to_msl(struct nir_to_msl_ctx *ctx, nir_variable_mode modes)
+{
+ bool requires_or = false;
+ u_foreach_bit(i, modes) {
+ nir_variable_mode single_mode = (1 << i);
+ if (requires_or)
+ P(ctx, " | ");
+ switch (single_mode) {
+ case nir_var_image:
+ P(ctx, "mem_flags::mem_texture");
+ break;
+ case nir_var_mem_ssbo:
+ case nir_var_mem_global:
+ P(ctx, "mem_flags::mem_device");
+ break;
+ case nir_var_function_temp:
+ P(ctx, "mem_flags::mem_none");
+ break;
+ case nir_var_mem_shared:
+ P(ctx, "mem_flags::mem_threadgroup");
+ break;
+ default:
+ UNREACHABLE("bad_memory_mode");
+ }
+ requires_or = true;
+ }
+}
+
+static uint32_t
+get_input_num_components(struct nir_to_msl_ctx *ctx, uint32_t location)
+{
+ return ctx->inputs_info[location].num_components;
+}
+
+static uint32_t
+get_output_num_components(struct nir_to_msl_ctx *ctx, uint32_t location)
+{
+ return ctx->outputs_info[location].num_components;
+}
+
+static void
+intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr)
+{
+ /* These instructions are only used to understand interpolation modes, they
+ * don't generate any code. */
+ if (instr->intrinsic == nir_intrinsic_load_barycentric_pixel ||
+ instr->intrinsic == nir_intrinsic_load_barycentric_centroid ||
+ instr->intrinsic == nir_intrinsic_load_barycentric_sample)
+ return;
+
+ const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
+ if (instrinsic_needs_dest_type(instr)) {
+ P_IND(ctx, "t%d = ", instr->def.index);
+ }
+ switch (instr->intrinsic) {
+ case nir_intrinsic_decl_reg: {
+ const char *reg_type = msl_uint_type(nir_intrinsic_bit_size(instr),
+ nir_intrinsic_num_components(instr));
+ P_IND(ctx, "%s r%d = %s(0);\n", reg_type, instr->def.index, reg_type);
+ } break;
+ case nir_intrinsic_load_reg:
+ // register loads get inlined into the uses
+ break;
+ case nir_intrinsic_store_reg:
+ P_IND(ctx, "r%d", instr->src[1].ssa->index);
+ writemask_to_msl(ctx, nir_intrinsic_write_mask(instr),
+ instr->num_components);
+ /* Registers don't store the component count, so get it from the value we
+ * are assigning */
+ if (instr->src[0].ssa->bit_size == 1u) {
+ P(ctx, " = bool%s((", component_str(instr->num_components));
+ } else if (nir_src_is_const(instr->src[0])) {
+ /* Const vector types already build the type */
+ if (instr->src[0].ssa->num_components > 1) {
+ P(ctx, " = as_type<%s>((",
+ msl_uint_type(instr->src[0].ssa->bit_size,
+ instr->src[0].ssa->num_components));
+ } else {
+ P(ctx, " = as_type<%s>(%s(",
+ msl_uint_type(instr->src[0].ssa->bit_size,
+ instr->src[0].ssa->num_components),
+ msl_type_for_src(ctx->types, &instr->src[0]));
+ }
+ } else {
+ P(ctx, " = as_type<%s>((",
+ msl_uint_type(instr->src[0].ssa->bit_size,
+ instr->src[0].ssa->num_components));
+ }
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, "));\n");
+ break;
+ case nir_intrinsic_load_subgroup_size:
+ P(ctx, "gl_SubGroupSize;\n");
+ break;
+ case nir_intrinsic_load_subgroup_invocation:
+ P(ctx, "gl_SubGroupInvocation;\n");
+ break;
+ case nir_intrinsic_load_num_subgroups:
+ P(ctx, "gl_NumSubGroups;\n");
+ break;
+ case nir_intrinsic_load_subgroup_id:
+ P(ctx, "gl_SubGroupID;\n");
+ break;
+ case nir_intrinsic_load_workgroup_id:
+ P(ctx, "gl_WorkGroupID;\n");
+ break;
+ case nir_intrinsic_load_local_invocation_id:
+ P(ctx, "gl_LocalInvocationID;\n");
+ break;
+ case nir_intrinsic_load_global_invocation_id:
+ P(ctx, "gl_GlobalInvocationID;\n");
+ break;
+ case nir_intrinsic_load_num_workgroups:
+ P(ctx, "gl_NumWorkGroups;\n");
+ break;
+ case nir_intrinsic_load_local_invocation_index:
+ P(ctx, "gl_LocalInvocationIndex;\n");
+ break;
+ case nir_intrinsic_load_frag_coord:
+ P(ctx, "gl_FragCoord;\n");
+ break;
+ case nir_intrinsic_load_point_coord:
+ P(ctx, "gl_PointCoord;\n");
+ break;
+ case nir_intrinsic_load_vertex_id:
+ P(ctx, "gl_VertexID;\n");
+ break;
+ case nir_intrinsic_load_instance_id:
+ P(ctx, "gl_InstanceID;\n");
+ break;
+ case nir_intrinsic_load_base_instance:
+ P(ctx, "gl_BaseInstance;\n");
+ break;
+ case nir_intrinsic_load_helper_invocation:
+ P(ctx, "gl_HelperInvocation;\n");
+ break;
+ case nir_intrinsic_is_helper_invocation:
+ P(ctx, "simd_is_helper_thread();\n");
+ break;
+ case nir_intrinsic_ddx:
+ case nir_intrinsic_ddx_coarse:
+ case nir_intrinsic_ddx_fine:
+ P(ctx, "dfdx(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ");\n");
+ break;
+ case nir_intrinsic_ddy:
+ case nir_intrinsic_ddy_coarse:
+ case nir_intrinsic_ddy_fine:
+ P(ctx, "dfdy(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ");\n");
+ break;
+ case nir_intrinsic_load_front_face:
+ P(ctx, "gl_FrontFacing;\n");
+ break;
+ case nir_intrinsic_load_layer_id:
+ P(ctx, "gl_Layer;\n");
+ break;
+ case nir_intrinsic_load_sample_id:
+ P(ctx, "gl_SampleID;\n");
+ break;
+ case nir_intrinsic_load_sample_mask_in:
+ P(ctx, "gl_SampleMask;\n");
+ break;
+ case nir_intrinsic_load_amplification_id_kk:
+ P(ctx, "mtl_AmplificationID;\n");
+ break;
+ case nir_intrinsic_load_interpolated_input: {
+ unsigned idx = nir_src_as_uint(instr->src[1u]);
+ nir_io_semantics io = nir_intrinsic_io_semantics(instr);
+ uint32_t component = nir_intrinsic_component(instr);
+ uint32_t location = io.location + idx;
+ P(ctx, "in.%s", msl_input_name(ctx, location));
+ if (instr->num_components < get_input_num_components(ctx, location)) {
+ P(ctx, ".");
+ for (unsigned i = 0; i < instr->num_components; i++)
+ P(ctx, "%c", "xyzw"[component + i]);
+ }
+ P(ctx, ";\n");
+ break;
+ }
+ case nir_intrinsic_load_input: {
+ unsigned idx = nir_src_as_uint(instr->src[0u]);
+ nir_io_semantics io = nir_intrinsic_io_semantics(instr);
+ uint32_t component = nir_intrinsic_component(instr);
+ uint32_t location = io.location + idx;
+ P(ctx, "in.%s", msl_input_name(ctx, location));
+ if (instr->num_components < get_input_num_components(ctx, location)) {
+ P(ctx, ".");
+ for (unsigned i = 0; i < instr->num_components; i++)
+ P(ctx, "%c", "xyzw"[component + i]);
+ }
+ P(ctx, ";\n");
+ break;
+ }
+ case nir_intrinsic_load_output: {
+ unsigned idx = nir_src_as_uint(instr->src[0]);
+ nir_io_semantics io = nir_intrinsic_io_semantics(instr);
+ P(ctx, "out.%s;\n", msl_output_name(ctx, io.location + idx));
+ break;
+ }
+ case nir_intrinsic_store_output: {
+ uint32_t idx = nir_src_as_uint(instr->src[1]);
+ nir_io_semantics io = nir_intrinsic_io_semantics(instr);
+ uint32_t location = io.location + idx;
+ uint32_t write_mask = nir_intrinsic_write_mask(instr);
+ uint32_t component = nir_intrinsic_component(instr);
+ uint32_t dst_num_components = get_output_num_components(ctx, location);
+ uint32_t num_components = instr->num_components;
+
+ P_IND(ctx, "out.%s", msl_output_name(ctx, location));
+ if (dst_num_components > 1u) {
+ P(ctx, ".");
+ for (unsigned i = 0; i < num_components; i++)
+ if ((write_mask >> i) & 1)
+ P(ctx, "%c", "xyzw"[component + i]);
+ }
+ P(ctx, " = ");
+ src_to_msl(ctx, &instr->src[0]);
+ if (num_components > 1u) {
+ P(ctx, ".");
+ for (unsigned i = 0; i < num_components; i++)
+ if ((write_mask >> i) & 1)
+ P(ctx, "%c", "xyzw"[i]);
+ }
+ P(ctx, ";\n");
+ break;
+ }
+ case nir_intrinsic_load_push_constant: {
+ const char *ty = msl_type_for_def(ctx->types, &instr->def);
+ assert(nir_intrinsic_base(instr) == 0);
+ P(ctx, "*((constant %s*)&buf.push_consts[", ty);
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, "]);\n");
+ break;
+ }
+ case nir_intrinsic_load_buffer_ptr_kk:
+ P(ctx, "(ulong)&buf%d.contents[0];\n", nir_intrinsic_binding(instr));
+ break;
+ case nir_intrinsic_load_global: {
+ src_to_packed_load(ctx, &instr->src[0], "device",
+ msl_type_for_def(ctx->types, &instr->def),
+ instr->def.num_components);
+ P(ctx, ";\n");
+ break;
+ }
+ case nir_intrinsic_load_global_constant: {
+ src_to_packed_load(ctx, &instr->src[0], "constant",
+ msl_type_for_def(ctx->types, &instr->def),
+ instr->def.num_components);
+ P(ctx, ";\n");
+ break;
+ }
+ case nir_intrinsic_load_global_constant_bounded: {
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, " < ");
+ src_to_msl(ctx, &instr->src[2]);
+ P(ctx, " ? ");
+ src_to_packed_load_offset(ctx, &instr->src[0], &instr->src[1], "constant",
+ msl_type_for_def(ctx->types, &instr->def),
+ instr->def.num_components);
+ P(ctx, " : 0;\n");
+ break;
+ }
+ case nir_intrinsic_load_global_constant_offset: {
+ src_to_packed_load_offset(ctx, &instr->src[0], &instr->src[1], "device",
+ msl_type_for_def(ctx->types, &instr->def),
+ instr->def.num_components);
+ P(ctx, ";\n");
+ break;
+ }
+ case nir_intrinsic_global_atomic:
+ atomic_to_msl(ctx, instr, "device", false);
+ break;
+ case nir_intrinsic_global_atomic_swap:
+ atomic_swap_to_msl(ctx, instr, "device", false);
+ break;
+ case nir_intrinsic_shared_atomic:
+ atomic_to_msl(ctx, instr, "threadgroup", true);
+ break;
+ case nir_intrinsic_shared_atomic_swap:
+ atomic_swap_to_msl(ctx, instr, "threadgroup", true);
+ break;
+ case nir_intrinsic_store_global: {
+ const char *type = msl_type_for_src(ctx->types, &instr->src[0]);
+ src_to_packed_store(ctx, &instr->src[1], "device", type,
+ instr->src[0].ssa->num_components);
+ writemask_to_msl(ctx, nir_intrinsic_write_mask(instr),
+ instr->num_components);
+ P(ctx, " = ")
+ src_to_packed(ctx, &instr->src[0], type,
+ instr->src[0].ssa->num_components);
+ P(ctx, ";\n");
+ break;
+ }
+ case nir_intrinsic_barrier: {
+ mesa_scope execution_scope = nir_intrinsic_execution_scope(instr);
+ nir_variable_mode memory_modes = nir_intrinsic_memory_modes(instr);
+ if (execution_scope == SCOPE_SUBGROUP) {
+ P_IND(ctx, "simdgroup_barrier(");
+ memory_modes_to_msl(ctx, memory_modes);
+ } else if (execution_scope == SCOPE_WORKGROUP) {
+ P_IND(ctx, "threadgroup_barrier(");
+ memory_modes_to_msl(ctx, memory_modes);
+ } else if (execution_scope == SCOPE_NONE) {
+ /* Empty barrier */
+ if (memory_modes == 0u)
+ break;
+
+ P_IND(ctx, "atomic_thread_fence(");
+ memory_modes_to_msl(ctx, memory_modes);
+ P(ctx, ", memory_order_seq_cst, ");
+ switch (nir_intrinsic_memory_scope(instr)) {
+ case SCOPE_SUBGROUP:
+ P(ctx, "thread_scope::thread_scope_simdgroup");
+ break;
+ case SCOPE_WORKGROUP:
+ /* TODO_KOSMICKRISP This if case should not be needed but we fail
+ * the following CTS tests otherwise:
+ * dEQP-VK.memory_model.*.ext.u32.*coherent.*.atomicwrite.workgroup.payload_*local.*.guard_local.*.comp
+ * The last two wild cards being either 'workgroup' or 'physbuffer'
+ */
+ if (memory_modes &
+ (nir_var_mem_global | nir_var_mem_ssbo | nir_var_image)) {
+ P(ctx, "thread_scope::thread_scope_device");
+ } else {
+ P(ctx, "thread_scope::thread_scope_threadgroup");
+ }
+
+ break;
+ case SCOPE_QUEUE_FAMILY:
+ case SCOPE_DEVICE:
+ P(ctx, "thread_scope::thread_scope_device");
+ break;
+ default:
+ P(ctx, "bad_scope");
+ assert(!"bad scope");
+ break;
+ }
+ } else {
+ UNREACHABLE("bad_execution scope");
+ }
+ P(ctx, ");\n");
+ break;
+ }
+ case nir_intrinsic_demote:
+ P_IND(ctx, "discard_fragment();\n");
+ break;
+ case nir_intrinsic_demote_if:
+ P_IND(ctx, "if (")
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ")\n");
+ ctx->indentlevel++;
+ P_IND(ctx, "discard_fragment();\n");
+ ctx->indentlevel--;
+ break;
+ case nir_intrinsic_terminate:
+ P_IND(ctx, "discard_fragment();\n");
+ P_IND(ctx, "return {};\n");
+ break;
+ case nir_intrinsic_terminate_if:
+ P_IND(ctx, "if (")
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ") {\n");
+ ctx->indentlevel++;
+ P_IND(ctx, "discard_fragment();\n");
+ P_IND(ctx, "return {};\n");
+ ctx->indentlevel--;
+ P_IND(ctx, "}\n");
+ break;
+ case nir_intrinsic_load_shared:
+ assert(nir_intrinsic_base(instr) == 0);
+ P(ctx, "*(threadgroup %s*)&shared_data[",
+ msl_type_for_def(ctx->types, &instr->def));
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, "];\n");
+ break;
+ case nir_intrinsic_store_shared:
+ assert(nir_intrinsic_base(instr) == 0);
+ P_IND(ctx, "(*(threadgroup %s*)&shared_data[",
+ msl_type_for_src(ctx->types, &instr->src[0]));
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, "])");
+ writemask_to_msl(ctx, nir_intrinsic_write_mask(instr),
+ instr->num_components);
+ P(ctx, " = ");
+ src_to_msl(ctx, &instr->src[0]);
+ if (instr->src[0].ssa->num_components > 1)
+ writemask_to_msl(ctx, nir_intrinsic_write_mask(instr),
+ instr->num_components);
+ P(ctx, ";\n");
+ break;
+ case nir_intrinsic_load_scratch:
+ P(ctx, "*(thread %s*)&scratch[",
+ msl_type_for_def(ctx->types, &instr->def));
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, "];\n");
+ break;
+ case nir_intrinsic_store_scratch:
+ P_IND(ctx, "(*(thread %s*)&scratch[",
+ msl_type_for_src(ctx->types, &instr->src[0]));
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, "])");
+ writemask_to_msl(ctx, nir_intrinsic_write_mask(instr),
+ instr->num_components);
+ P(ctx, " = ");
+ src_to_msl(ctx, &instr->src[0]);
+ if (instr->src[0].ssa->num_components > 1)
+ writemask_to_msl(ctx, nir_intrinsic_write_mask(instr),
+ instr->num_components);
+ P(ctx, ";\n");
+ break;
+ case nir_intrinsic_load_texture_handle_kk: {
+ const char *access = "";
+ switch (nir_intrinsic_flags(instr)) {
+ case MSL_ACCESS_READ:
+ access = ", access::read";
+ break;
+ case MSL_ACCESS_WRITE:
+ access = ", access::write";
+ break;
+ case MSL_ACCESS_READ_WRITE:
+ access = ", access::read_write";
+ break;
+ }
+ P_IND(ctx, "texture%s%s<%s%s> t%d = *(constant texture%s%s<%s%s>*)",
+ texture_dim(nir_intrinsic_image_dim(instr)),
+ nir_intrinsic_image_array(instr) ? "_array" : "",
+ tex_type_name(nir_intrinsic_dest_type(instr)), access,
+ instr->def.index, texture_dim(nir_intrinsic_image_dim(instr)),
+ nir_intrinsic_image_array(instr) ? "_array" : "",
+ tex_type_name(nir_intrinsic_dest_type(instr)), access);
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ";\n");
+ break;
+ }
+ case nir_intrinsic_load_depth_texture_kk:
+ P_IND(ctx, "depth%s%s<float> t%d = *(constant depth%s%s<float>*)",
+ texture_dim(nir_intrinsic_image_dim(instr)),
+ nir_intrinsic_image_array(instr) ? "_array" : "", instr->def.index,
+ texture_dim(nir_intrinsic_image_dim(instr)),
+ nir_intrinsic_image_array(instr) ? "_array" : "");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ";\n");
+ break;
+ case nir_intrinsic_load_sampler_handle_kk:
+ P(ctx, "sampler_table.handles[");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, "];\n");
+ break;
+ case nir_intrinsic_load_constant_agx: {
+ const char *type = msl_type_for_def(ctx->types, &instr->def);
+ const char *no_component_type =
+ msl_pipe_format_to_msl_type(nir_intrinsic_format(instr));
+ if (instr->def.num_components == 1) {
+ P(ctx, "(*(((constant %s*)", type);
+ } else {
+ P(ctx, "%s(*(constant packed_%s*)(((constant %s*)", type, type,
+ no_component_type);
+ }
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ") + ");
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, "));\n");
+ break;
+ }
+ case nir_intrinsic_bindless_image_load:
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ".read(");
+ image_coord_swizzle(ctx, instr);
+ if (nir_intrinsic_image_dim(instr) != GLSL_SAMPLER_DIM_BUF) {
+ P(ctx, ", ");
+ src_to_msl(ctx, &instr->src[3]);
+ }
+ /* read will always return vec4 and we may try to assign that to an uint
+ * which is illegal. */
+ P(ctx, ").");
+ for (uint32_t i = 0u; i < instr->def.num_components; ++i) {
+ P(ctx, "%c", "xyzw"[i]);
+ }
+ P(ctx, ";\n");
+ break;
+ case nir_intrinsic_bindless_image_store:
+ P_INDENT(ctx);
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ".write(");
+ src_to_msl(ctx, &instr->src[3]);
+ P(ctx, ", ");
+ image_coord_swizzle(ctx, instr);
+ if (nir_intrinsic_image_dim(instr) != GLSL_SAMPLER_DIM_BUF) {
+ P(ctx, ", ");
+ src_to_msl(ctx, &instr->src[4]);
+ }
+ P(ctx, ");\n");
+ break;
+ case nir_intrinsic_bindless_image_atomic:
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ".%s(", atomic_op_to_msl(nir_intrinsic_atomic_op(instr)));
+ image_coord_swizzle(ctx, instr);
+ P(ctx, ", ");
+ src_to_msl(ctx, &instr->src[3]);
+ P(ctx, ").x;\n");
+ break;
+ case nir_intrinsic_bindless_image_atomic_swap: {
+ const char *type = msl_type_for_def(ctx->types, &instr->def);
+ P_IND(ctx, "%s4 ta%d = ", type, instr->def.index);
+ src_to_msl(ctx, &instr->src[3]);
+ P(ctx, "; ");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ".%s(", atomic_op_to_msl(nir_intrinsic_atomic_op(instr)));
+ image_coord_swizzle(ctx, instr);
+ P(ctx, ", &ta%d, ", instr->def.index);
+ src_to_msl(ctx, &instr->src[4]);
+ P(ctx, "); %s t%d = ta%d.x;\n", type, instr->def.index, instr->def.index);
+ break;
+ }
+ case nir_intrinsic_ballot:
+ P(ctx, "(ulong)simd_ballot(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ");\n");
+ break;
+ case nir_intrinsic_elect:
+ /* If we don't add && "(ulong)simd_ballot(true)"" the following CTS tests
+ * fail:
+ * dEQP-VK.subgroups.ballot_other.graphics.subgroupballotfindlsb
+ * dEQP-VK.subgroups.ballot_other.compute.subgroupballotfindlsb
+ * Weird Metal bug:
+ * if (simd_is_first())
+ * temp = 3u;
+ * else
+ * temp = simd_ballot(true); <- This will return all active threads...
+ */
+ P(ctx, "simd_is_first() && (ulong)simd_ballot(true);\n");
+ break;
+ case nir_intrinsic_read_first_invocation:
+ P(ctx, "simd_broadcast_first(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ");\n");
+ break;
+ case nir_intrinsic_read_invocation:
+ P(ctx, "simd_broadcast(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ", ");
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, ");");
+ break;
+ case nir_intrinsic_shuffle:
+ P(ctx, "simd_shuffle(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ", ");
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, ");\n");
+ break;
+ case nir_intrinsic_shuffle_xor:
+ P(ctx, "simd_shuffle_xor(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ", ");
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, ");\n");
+ break;
+ case nir_intrinsic_shuffle_up:
+ P(ctx, "simd_shuffle_up(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ", ");
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, ");\n");
+ break;
+ case nir_intrinsic_shuffle_down:
+ P(ctx, "simd_shuffle_down(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ", ");
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, ");\n");
+ break;
+
+ case nir_intrinsic_vote_all:
+ P(ctx, "simd_all(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ");\n");
+ break;
+ case nir_intrinsic_vote_any:
+ P(ctx, "simd_any(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ");\n");
+ break;
+ case nir_intrinsic_quad_broadcast:
+ P(ctx, "quad_broadcast(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ", ");
+ src_to_msl(ctx, &instr->src[1]);
+ P(ctx, ");\n");
+ break;
+ case nir_intrinsic_quad_swap_horizontal:
+ P(ctx, "quad_shuffle_xor(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ", 1);\n");
+ break;
+ case nir_intrinsic_quad_swap_vertical:
+ P(ctx, "quad_shuffle_xor(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ", 2);\n");
+ break;
+ case nir_intrinsic_quad_swap_diagonal:
+ P(ctx, "quad_shuffle_xor(");
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ", 3);\n");
+ break;
+ case nir_intrinsic_reduce:
+ switch (nir_intrinsic_reduction_op(instr)) {
+ case nir_op_iadd:
+ case nir_op_fadd:
+ P(ctx, "simd_sum(");
+ break;
+ case nir_op_imul:
+ case nir_op_fmul:
+ P(ctx, "simd_product(");
+ break;
+ case nir_op_imin:
+ case nir_op_umin:
+ case nir_op_fmin:
+ P(ctx, "simd_min(");
+ break;
+ case nir_op_imax:
+ case nir_op_umax:
+ case nir_op_fmax:
+ P(ctx, "simd_max(");
+ break;
+ case nir_op_iand:
+ P(ctx, "simd_and(");
+ break;
+ case nir_op_ior:
+ P(ctx, "simd_or(");
+ break;
+ case nir_op_ixor:
+ P(ctx, "simd_xor(");
+ break;
+ default:
+ UNREACHABLE("Bad reduction op");
+ }
+
+ src_to_msl(ctx, &instr->src[0]);
+ P(ctx, ");\n");
+ break;
+ default:
+ P_IND(ctx, "Unknown intrinsic %s\n", info->name);
+ }
+}
+
+static nir_src *
+nir_tex_get_src(struct nir_tex_instr *tex, nir_tex_src_type type)
+{
+ int idx = nir_tex_instr_src_index(tex, type);
+ if (idx == -1)
+ return NULL;
+ return &tex->src[idx].src;
+}
+
+static void
+tex_coord_swizzle(struct nir_to_msl_ctx *ctx, nir_tex_instr *tex)
+{
+ texture_src_coord_swizzle(ctx, nir_tex_get_src(tex, nir_tex_src_coord),
+ tex->coord_components, false, tex->is_array);
+}
+
+static void
+tex_to_msl(struct nir_to_msl_ctx *ctx, nir_tex_instr *tex)
+{
+ nir_src *texhandle = nir_tex_get_src(tex, nir_tex_src_texture_handle);
+ nir_src *sampler = nir_tex_get_src(tex, nir_tex_src_sampler_handle);
+ // Projectors have to be lowered away to regular arithmetic
+ assert(!nir_tex_get_src(tex, nir_tex_src_projector));
+
+ P_IND(ctx, "t%d = ", tex->def.index);
+
+ switch (tex->op) {
+ case nir_texop_tex:
+ case nir_texop_txb:
+ case nir_texop_txl:
+ case nir_texop_txd: {
+ nir_src *bias = nir_tex_get_src(tex, nir_tex_src_bias);
+ nir_src *lod = nir_tex_get_src(tex, nir_tex_src_lod);
+ nir_src *ddx = nir_tex_get_src(tex, nir_tex_src_ddx);
+ nir_src *ddy = nir_tex_get_src(tex, nir_tex_src_ddy);
+ nir_src *min_lod_clamp = nir_tex_get_src(tex, nir_tex_src_min_lod);
+ nir_src *offset = nir_tex_get_src(tex, nir_tex_src_offset);
+ nir_src *comparator = nir_tex_get_src(tex, nir_tex_src_comparator);
+ src_to_msl(ctx, texhandle);
+ if (comparator) {
+ P(ctx, ".sample_compare(");
+ } else {
+ P(ctx, ".sample(");
+ }
+ src_to_msl(ctx, sampler);
+ P(ctx, ", ");
+ tex_coord_swizzle(ctx, tex);
+ if (comparator) {
+ P(ctx, ", ");
+ src_to_msl(ctx, comparator);
+ }
+ if (bias) {
+ P(ctx, ", bias(");
+ src_to_msl(ctx, bias);
+ P(ctx, ")");
+ }
+ if (lod) {
+ P(ctx, ", level(");
+ src_to_msl(ctx, lod);
+ P(ctx, ")");
+ }
+ if (ddx) {
+ P(ctx, ", gradient%s(", texture_dim(tex->sampler_dim));
+ src_to_msl(ctx, ddx);
+ P(ctx, ", ");
+ src_to_msl(ctx, ddy);
+ P(ctx, ")");
+ }
+ if (min_lod_clamp) {
+ P(ctx, ", min_lod_clamp(");
+ src_to_msl(ctx, min_lod_clamp);
+ P(ctx, ")");
+ }
+ if (offset) {
+ P(ctx, ", ");
+ src_to_msl(ctx, offset);
+ }
+ P(ctx, ");\n");
+ break;
+ }
+ case nir_texop_txf: {
+ src_to_msl(ctx, texhandle);
+ P(ctx, ".read(");
+ tex_coord_swizzle(ctx, tex);
+ nir_src *lod = nir_tex_get_src(tex, nir_tex_src_lod);
+ if (lod) {
+ P(ctx, ", ");
+ src_to_msl(ctx, lod);
+ }
+ P(ctx, ");\n");
+ break;
+ }
+ case nir_texop_txf_ms:
+ src_to_msl(ctx, texhandle);
+ P(ctx, ".read(");
+ tex_coord_swizzle(ctx, tex);
+ P(ctx, ", ");
+ src_to_msl(ctx, nir_tex_get_src(tex, nir_tex_src_ms_index));
+ P(ctx, ");\n");
+ break;
+ case nir_texop_txs: {
+ nir_src *lod = nir_tex_get_src(tex, nir_tex_src_lod);
+ if (tex->def.num_components > 1u) {
+ P(ctx, "%s%d(", tex_type_name(tex->dest_type),
+ tex->def.num_components);
+ } else {
+ P(ctx, "%s(", tex_type_name(tex->dest_type));
+ }
+ src_to_msl(ctx, texhandle);
+ P(ctx, ".get_width(")
+ if (lod && tex->sampler_dim != GLSL_SAMPLER_DIM_MS &&
+ tex->sampler_dim != GLSL_SAMPLER_DIM_BUF)
+ src_to_msl(ctx, lod);
+ P(ctx, ")");
+ if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D &&
+ tex->sampler_dim != GLSL_SAMPLER_DIM_BUF) {
+ P(ctx, ", ");
+ src_to_msl(ctx, texhandle);
+ P(ctx, ".get_height(");
+ if (lod && tex->sampler_dim != GLSL_SAMPLER_DIM_MS &&
+ tex->sampler_dim != GLSL_SAMPLER_DIM_BUF)
+ src_to_msl(ctx, lod);
+ P(ctx, ")");
+ }
+ if (tex->sampler_dim == GLSL_SAMPLER_DIM_3D) {
+ P(ctx, ", ");
+ src_to_msl(ctx, texhandle);
+ P(ctx, ".get_depth(");
+ if (lod)
+ src_to_msl(ctx, lod);
+ P(ctx, ")");
+ }
+ if (tex->is_array) {
+ P(ctx, ", ");
+ src_to_msl(ctx, texhandle);
+ P(ctx, ".get_array_size()");
+ }
+ P(ctx, ");\n")
+ break;
+ }
+ case nir_texop_query_levels:
+ src_to_msl(ctx, texhandle);
+ P(ctx, ".get_num_mip_levels();\n");
+ break;
+ case nir_texop_tg4: {
+ nir_src *offset = nir_tex_get_src(tex, nir_tex_src_offset);
+ nir_src *comparator = nir_tex_get_src(tex, nir_tex_src_comparator);
+ src_to_msl(ctx, texhandle);
+ if (comparator) {
+ P(ctx, ".gather_compare(");
+ } else {
+ P(ctx, ".gather(");
+ }
+ src_to_msl(ctx, sampler);
+ P(ctx, ", ");
+ tex_coord_swizzle(ctx, tex);
+ if (comparator) {
+ P(ctx, ", ");
+ src_to_msl(ctx, comparator);
+ }
+ P(ctx, ", ");
+ if (offset)
+ src_to_msl(ctx, offset);
+ else
+ P(ctx, "int2(0)");
+
+ /* Non-depth textures require component */
+ if (!comparator) {
+ P(ctx, ", component::%c", "xyzw"[tex->component]);
+ }
+
+ P(ctx, ");\n");
+ break;
+ }
+
+ case nir_texop_texture_samples:
+ src_to_msl(ctx, texhandle);
+ P(ctx, ".get_num_samples();\n");
+ break;
+ case nir_texop_lod: {
+ nir_src *coord = nir_tex_get_src(tex, nir_tex_src_coord);
+ nir_src *bias = nir_tex_get_src(tex, nir_tex_src_bias);
+ nir_src *min = nir_tex_get_src(tex, nir_tex_src_min_lod);
+ nir_src *max = nir_tex_get_src(tex, nir_tex_src_max_lod_kk);
+ P(ctx, "float2(round(clamp(")
+ src_to_msl(ctx, texhandle);
+ P(ctx, ".calculate_unclamped_lod(");
+ src_to_msl(ctx, sampler);
+ P(ctx, ", ");
+ src_to_msl(ctx, coord);
+ P(ctx, ") + ");
+ src_to_msl(ctx, bias);
+ P(ctx, ", ");
+ src_to_msl(ctx, min);
+ P(ctx, ", ");
+ src_to_msl(ctx, max);
+ P(ctx, ")), ");
+ src_to_msl(ctx, texhandle);
+ P(ctx, ".calculate_unclamped_lod(");
+ src_to_msl(ctx, sampler);
+ P(ctx, ", ");
+ src_to_msl(ctx, coord);
+ P(ctx, ")");
+ P(ctx, ");\n");
+ break;
+ }
+ default:
+ assert(!"Unsupported texture op");
+ }
+}
+
+static void
+jump_instr_to_msl(struct nir_to_msl_ctx *ctx, nir_jump_instr *jump)
+{
+ switch (jump->type) {
+ case nir_jump_halt:
+ P_IND(ctx, "TODO: halt\n");
+ assert(!"Unimplemented");
+ break;
+ case nir_jump_break:
+ P_IND(ctx, "break;\n");
+ break;
+ case nir_jump_continue:
+ P_IND(ctx, "continue;\n");
+ break;
+ case nir_jump_return:
+ assert(!"functions should have been inlined by now");
+ break;
+ case nir_jump_goto:
+ case nir_jump_goto_if:
+ assert(!"Unstructured control flow not supported");
+ break;
+ }
+}
+
+static void
+instr_to_msl(struct nir_to_msl_ctx *ctx, nir_instr *instr)
+{
+ switch (instr->type) {
+ case nir_instr_type_alu: {
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ P_IND(ctx, "t%d = ", alu->def.index);
+ alu_to_msl(ctx, alu);
+ P(ctx, ";\n");
+ break;
+ }
+ case nir_instr_type_deref:
+ assert(!"We should have lowered derefs by now");
+ break;
+ case nir_instr_type_call:
+ assert(!"We should have inlined all functions by now");
+ break;
+ case nir_instr_type_tex:
+ tex_to_msl(ctx, nir_instr_as_tex(instr));
+ break;
+ case nir_instr_type_intrinsic:
+ intrinsic_to_msl(ctx, nir_instr_as_intrinsic(instr));
+ break;
+ case nir_instr_type_load_const:
+ // consts get inlined into their uses
+ break;
+ case nir_instr_type_jump:
+ jump_instr_to_msl(ctx, nir_instr_as_jump(instr));
+ break;
+ case nir_instr_type_undef:
+ // undefs get inlined into their uses (and we shouldn't see them hopefully)
+ break;
+ case nir_instr_type_phi:
+ case nir_instr_type_parallel_copy:
+ assert(!"NIR should be taken out of SSA");
+ break;
+ }
+}
+
+static void
+cf_node_to_metal(struct nir_to_msl_ctx *ctx, nir_cf_node *node)
+{
+ switch (node->type) {
+ case nir_cf_node_block: {
+ nir_block *block = nir_cf_node_as_block(node);
+ nir_foreach_instr(instr, block) {
+ instr_to_msl(ctx, instr);
+ }
+ break;
+ }
+ case nir_cf_node_if: {
+ nir_if *ifnode = nir_cf_node_as_if(node);
+ P_IND(ctx, "if (");
+ src_to_msl(ctx, &ifnode->condition);
+ P(ctx, ") {\n");
+ ctx->indentlevel++;
+ foreach_list_typed(nir_cf_node, node, node, &ifnode->then_list) {
+ cf_node_to_metal(ctx, node);
+ }
+ ctx->indentlevel--;
+ if (!nir_cf_list_is_empty_block(&ifnode->else_list)) {
+ P_IND(ctx, "} else {\n");
+ ctx->indentlevel++;
+ foreach_list_typed(nir_cf_node, node, node, &ifnode->else_list) {
+ cf_node_to_metal(ctx, node);
+ }
+ ctx->indentlevel--;
+ }
+ P_IND(ctx, "}\n");
+ break;
+ }
+ case nir_cf_node_loop: {
+ nir_loop *loop = nir_cf_node_as_loop(node);
+ assert(!nir_loop_has_continue_construct(loop));
+ /* We need to loop to infinite since MSL compiler crashes if we have
+ something like (simplified version):
+ * // clang-format off
+ * while (true) {
+ * if (some_conditional) {
+ * break_loop = true;
+ * } else {
+ * break_loop = false;
+ * }
+ * if (break_loop) {
+ * break;
+ * }
+ * }
+ * // clang-format on
+ * The issue I believe is that some_conditional wouldn't change the value
+ * no matter in which iteration we are (something like fetching the same
+ * value from a buffer) and the MSL compiler doesn't seem to like that
+ * much to the point it crashes.
+ * With this for loop now, we trick the MSL compiler into believing we are
+ * not doing an infinite loop (wink wink)
+ */
+ P_IND(ctx,
+ "for (uint64_t no_crash = 0u; no_crash < %" PRIu64
+ "; ++no_crash) {\n",
+ UINT64_MAX);
+ ctx->indentlevel++;
+ foreach_list_typed(nir_cf_node, node, node, &loop->body) {
+ cf_node_to_metal(ctx, node);
+ }
+ ctx->indentlevel--;
+ P_IND(ctx, "}\n");
+ break;
+ }
+ case nir_cf_node_function:
+ assert(!"All functions are supposed to be inlined");
+ }
+}
+
+static void
+emit_output_return(struct nir_to_msl_ctx *ctx, nir_shader *shader)
+{
+ if (shader->info.stage == MESA_SHADER_VERTEX ||
+ shader->info.stage == MESA_SHADER_FRAGMENT)
+ P_IND(ctx, "return out;\n");
+}
+
+static void
+rename_main_entrypoint(struct nir_shader *nir)
+{
+ /* Rename entrypoint to avoid MSL limitations after we've removed all others.
+ * We don't really care what it's named as long as it's not "main"
+ */
+ const char *entrypoint_name = "main_entrypoint";
+ nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
+ struct nir_function *function = entrypoint->function;
+ ralloc_free((void *)function->name);
+ function->name = ralloc_strdup(function, entrypoint_name);
+}
+
+static bool
+kk_scalarize_filter(const nir_instr *instr, const void *data)
+{
+ if (instr->type != nir_instr_type_alu)
+ return false;
+ return true;
+}
+
+void
+msl_preprocess_nir(struct nir_shader *nir)
+{
+ /* First, inline away all the functions */
+ NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp);
+ NIR_PASS(_, nir, nir_lower_returns);
+ NIR_PASS(_, nir, nir_inline_functions);
+ NIR_PASS(_, nir, nir_opt_deref);
+ nir_remove_non_entrypoints(nir);
+
+ NIR_PASS(_, nir, nir_lower_global_vars_to_local);
+ NIR_PASS(_, nir, nir_split_var_copies);
+ NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
+ NIR_PASS(_, nir, nir_split_array_vars, nir_var_function_temp);
+ NIR_PASS(_, nir, nir_split_per_member_structs);
+ NIR_PASS(_, nir, nir_lower_continue_constructs);
+
+ NIR_PASS(_, nir, nir_lower_frexp);
+
+ NIR_PASS(_, nir, nir_lower_vars_to_ssa);
+ NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
+ if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ nir_input_attachment_options input_attachment_options = {
+ .use_fragcoord_sysval = true,
+ .use_layer_id_sysval = true,
+ };
+ NIR_PASS(_, nir, nir_lower_input_attachments, &input_attachment_options);
+ }
+ NIR_PASS(_, nir, nir_opt_combine_barriers, NULL, NULL);
+ NIR_PASS(_, nir, nir_lower_var_copies);
+ NIR_PASS(_, nir, nir_split_var_copies);
+
+ NIR_PASS(_, nir, nir_split_array_vars,
+ nir_var_function_temp | nir_var_shader_in | nir_var_shader_out);
+ NIR_PASS(_, nir, nir_lower_alu_to_scalar, kk_scalarize_filter, NULL);
+
+ NIR_PASS(_, nir, nir_lower_indirect_derefs,
+ nir_var_shader_in | nir_var_shader_out, UINT32_MAX);
+ NIR_PASS(_, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 0,
+ glsl_get_natural_size_align_bytes,
+ glsl_get_natural_size_align_bytes);
+
+ NIR_PASS(_, nir, nir_lower_system_values);
+
+ nir_lower_compute_system_values_options csv_options = {
+ .has_base_global_invocation_id = 0,
+ .has_base_workgroup_id = true,
+ };
+ NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
+
+ msl_nir_lower_subgroups(nir);
+}
+
+bool
+msl_optimize_nir(struct nir_shader *nir)
+{
+ bool progress;
+ NIR_PASS(_, nir, nir_lower_int64);
+ do {
+ progress = false;
+
+ NIR_PASS(progress, nir, nir_split_var_copies);
+ NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp);
+ NIR_PASS(progress, nir, nir_lower_var_copies);
+ NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
+ NIR_PASS(progress, nir, nir_opt_undef);
+ NIR_PASS(progress, nir, nir_opt_dce);
+ NIR_PASS(progress, nir, nir_opt_cse);
+ NIR_PASS(progress, nir, nir_opt_dead_cf);
+ NIR_PASS(progress, nir, nir_copy_prop);
+ NIR_PASS(progress, nir, nir_opt_deref);
+ NIR_PASS(progress, nir, nir_opt_constant_folding);
+ NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
+ NIR_PASS(progress, nir, nir_opt_dead_write_vars);
+ NIR_PASS(progress, nir, nir_opt_combine_stores, nir_var_all);
+ NIR_PASS(progress, nir, nir_remove_dead_variables, nir_var_function_temp,
+ NULL);
+ NIR_PASS(progress, nir, nir_opt_algebraic);
+ NIR_PASS(progress, nir, nir_opt_if, 0);
+ NIR_PASS(progress, nir, nir_opt_remove_phis);
+ NIR_PASS(progress, nir, nir_opt_loop);
+ NIR_PASS(progress, nir, nir_lower_pack);
+ NIR_PASS(progress, nir, nir_lower_alu_to_scalar, kk_scalarize_filter,
+ NULL);
+ } while (progress);
+ NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
+ NIR_PASS(_, nir, msl_nir_lower_algebraic_late);
+ NIR_PASS(_, nir, nir_convert_from_ssa, true, false);
+ nir_trivialize_registers(nir);
+ NIR_PASS(_, nir, nir_copy_prop);
+
+ return progress;
+}
+
+static void
+msl_gather_info(struct nir_to_msl_ctx *ctx)
+{
+ nir_function_impl *impl = nir_shader_get_entrypoint(ctx->shader);
+ ctx->types = msl_infer_types(ctx->shader);
+
+ /* TODO_KOSMICKRISP
+ * Reindex blocks and ssa. This allows us to optimize things we don't at the
+ * moment. */
+ nir_index_blocks(impl);
+ nir_index_ssa_defs(impl);
+
+ if (ctx->shader->info.stage == MESA_SHADER_VERTEX ||
+ ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {
+ msl_gather_io_info(ctx, ctx->inputs_info, ctx->outputs_info);
+ }
+}
+
+static void
+predeclare_ssa_values(struct nir_to_msl_ctx *ctx, nir_function_impl *impl)
+{
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr(instr, block) {
+ nir_def *def;
+ switch (instr->type) {
+ case nir_instr_type_alu: {
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ def = &alu->def;
+ break;
+ }
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (!instrinsic_needs_dest_type(intr))
+ continue;
+ def = &intr->def;
+ break;
+ }
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ def = &tex->def;
+ break;
+ }
+ default:
+ continue;
+ }
+ const char *type = msl_type_for_def(ctx->types, def);
+ if (!type)
+ continue;
+ if (msl_def_is_sampler(ctx, def)) {
+ P_IND(ctx, "%s t%u;\n", type, def->index);
+ } else
+ P_IND(ctx, "%s t%u = %s(0);\n", type, def->index, type);
+ }
+ }
+}
+
+char *
+nir_to_msl(nir_shader *shader, void *mem_ctx)
+{
+ /* Need to rename the entrypoint here since hardcoded shaders used by vk_meta
+ * don't go through the preprocess step since we are the ones creating them.
+ */
+ rename_main_entrypoint(shader);
+
+ struct nir_to_msl_ctx ctx = {
+ .shader = shader,
+ .text = _mesa_string_buffer_create(mem_ctx, 1024),
+ };
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+ msl_gather_info(&ctx);
+
+ P(&ctx, "// Generated by Mesa compiler\n");
+ if (shader->info.stage == MESA_SHADER_COMPUTE)
+ P(&ctx, "#include <metal_compute>\n");
+ P(&ctx, "#include <metal_stdlib>\n");
+ P(&ctx, "using namespace metal;\n");
+
+ msl_emit_io_blocks(&ctx, shader);
+ if (shader->info.stage == MESA_SHADER_FRAGMENT &&
+ shader->info.fs.early_fragment_tests)
+ P(&ctx, "[[early_fragment_tests]]\n");
+ P(&ctx, "%s %s %s(\n", get_stage_string(shader->info.stage),
+ output_type(shader), get_entrypoint_name(shader));
+ ctx.indentlevel++;
+ emit_sysvals(&ctx, shader);
+ emit_inputs(&ctx, shader);
+ ctx.indentlevel--;
+ P(&ctx, ")\n");
+ P(&ctx, "{\n");
+ ctx.indentlevel++;
+ msl_emit_output_var(&ctx, shader);
+ emit_local_vars(&ctx, shader);
+ predeclare_ssa_values(&ctx, impl);
+ foreach_list_typed(nir_cf_node, node, node, &impl->body) {
+ cf_node_to_metal(&ctx, node);
+ }
+ emit_output_return(&ctx, shader);
+ ctx.indentlevel--;
+ P(&ctx, "}\n");
+ char *ret = ctx.text->buf;
+ ralloc_steal(mem_ctx, ctx.text->buf);
+ ralloc_free(ctx.text);
+ return ret;
+}
diff --git a/src/kosmickrisp/compiler/nir_to_msl.h b/src/kosmickrisp/compiler/nir_to_msl.h
new file mode 100644
index 00000000000..0a4d50109ed
--- /dev/null
+++ b/src/kosmickrisp/compiler/nir_to_msl.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#pragma once
+
+#include "nir.h"
+
+enum pipe_format;
+
+/* Assumes nir_shader_gather_info has been called beforehand. */
+char *nir_to_msl(nir_shader *shader, void *mem_ctx);
+
+/* Call this after all API-specific lowerings. It will bring the NIR out of SSA
+ * at the end */
+bool msl_optimize_nir(struct nir_shader *nir);
+
+/* Call this before all API-speicific lowerings, it will */
+void msl_preprocess_nir(struct nir_shader *nir);
+
+enum msl_tex_access_flag {
+ MSL_ACCESS_SAMPLE = 0,
+ MSL_ACCESS_READ,
+ MSL_ACCESS_WRITE,
+ MSL_ACCESS_READ_WRITE,
+};
+
+static inline enum msl_tex_access_flag
+msl_convert_access_flag(enum gl_access_qualifier qual)
+{
+ if (qual & ACCESS_NON_WRITEABLE)
+ return MSL_ACCESS_READ;
+ if (qual & ACCESS_NON_READABLE)
+ return MSL_ACCESS_WRITE;
+ return MSL_ACCESS_READ_WRITE;
+}
+
+bool msl_nir_fs_force_output_signedness(
+ nir_shader *nir, enum pipe_format render_target_formats[MAX_DRAW_BUFFERS]);
+
+bool msl_nir_vs_remove_point_size_write(nir_builder *b,
+ nir_intrinsic_instr *intrin,
+ void *data);
+
+bool msl_nir_fs_remove_depth_write(nir_builder *b, nir_intrinsic_instr *intrin,
+ void *data);
+
+bool msl_lower_textures(nir_shader *s);
+
+bool msl_lower_static_sample_mask(nir_shader *nir, uint32_t sample_mask);
+bool msl_ensure_depth_write(nir_shader *nir);
+bool msl_ensure_vertex_position_output(nir_shader *nir);
+bool msl_nir_sample_mask_type(nir_shader *nir);
+bool msl_nir_layer_id_type(nir_shader *nir);
diff --git a/src/kosmickrisp/kosmicomp.c b/src/kosmickrisp/kosmicomp.c
new file mode 100644
index 00000000000..a45d8203c96
--- /dev/null
+++ b/src/kosmickrisp/kosmicomp.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <stdio.h>
+
+#include "compiler/nir_to_msl.h"
+#include "spirv/nir_spirv.h"
+
+static int
+load_spirv(const char *filename, uint32_t **words, size_t *nwords)
+{
+ const size_t CHUNK_SIZE = 4096;
+ uint32_t buf[CHUNK_SIZE];
+ FILE *input = fopen(filename, "r");
+ if (!input) {
+ fprintf(stderr, "Could not open file %s: %s\n", filename,
+ strerror(errno));
+ return -1;
+ }
+
+ *nwords = 0;
+ *words = malloc(CHUNK_SIZE * sizeof(buf[0]));
+ size_t read_size;
+ while (1) {
+ read_size = fread(buf, sizeof(buf[0]), CHUNK_SIZE, input);
+ if (read_size == 0)
+ break;
+ *words = realloc(*words, (*nwords + read_size) * sizeof(buf[0]));
+ memcpy(*words + *nwords, buf, sizeof(buf[0]) * read_size);
+ *nwords += read_size;
+ };
+
+ if (*words[0] != 0x07230203) {
+ fprintf(stderr, "%s is not a SPIR-V file?\n", filename);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+debug_callback(void *priv, enum nir_spirv_debug_level debuglevel, size_t offset,
+ const char *message)
+{
+ fprintf(stderr, "<%d> at %ld %s\n", debuglevel, offset, message);
+}
+
+static int
+type_size_vec4(const struct glsl_type *type, bool bindless)
+{
+ return glsl_count_attribute_slots(type, false);
+}
+
+static void
+shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+ assert(glsl_type_is_vector_or_scalar(type));
+
+ uint32_t comp_size =
+ glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
+ unsigned length = glsl_get_vector_elements(type);
+ *size = comp_size * length, *align = comp_size;
+}
+
+static void
+optimize(nir_shader *nir)
+{
+ msl_preprocess_nir(nir);
+
+ NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
+ nir_address_format_32bit_offset);
+ NIR_PASS(_, nir, nir_lower_explicit_io,
+ nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo,
+ nir_address_format_64bit_global);
+ if (nir->info.stage == MESA_SHADER_COMPUTE) {
+ if (!nir->info.shared_memory_explicit_layout) {
+ /* There may be garbage in shared_size, but it's the job of
+ * nir_lower_vars_to_explicit_types to allocate it. We have to reset to
+ * avoid overallocation.
+ */
+ nir->info.shared_size = 0;
+
+ NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared,
+ shared_var_info);
+ }
+ NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared,
+ nir_address_format_32bit_offset);
+ }
+
+ NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+ type_size_vec4, (nir_lower_io_options)0);
+
+ NIR_PASS(_, nir, nir_lower_variable_initializers, ~nir_var_function_temp);
+ NIR_PASS(_, nir, nir_remove_dead_variables,
+ nir_var_shader_in | nir_var_shader_out | nir_var_system_value,
+ NULL);
+ NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries,
+ nir_shader_get_entrypoint(nir), true, false);
+ nir_lower_compute_system_values_options options = {
+ .has_base_global_invocation_id = 0,
+ };
+ NIR_PASS(_, nir, nir_lower_system_values);
+ NIR_PASS(_, nir, nir_lower_compute_system_values, &options);
+ NIR_PASS(_, nir, nir_lower_global_vars_to_local);
+ NIR_PASS(_, nir, nir_lower_load_const_to_scalar);
+
+ msl_optimize_nir(nir);
+}
+
+static mesa_shader_stage
+stage_from_filename(const char *filename)
+{
+ struct StageMapping {
+ char *name;
+ mesa_shader_stage stage;
+ };
+ struct StageMapping stage_mappings[] = {
+ {.name = ".frag.", .stage = MESA_SHADER_FRAGMENT},
+ {.name = ".vert.", .stage = MESA_SHADER_VERTEX},
+ {.name = ".comp.", .stage = MESA_SHADER_COMPUTE},
+ };
+ for (int i = 0; i < ARRAY_SIZE(stage_mappings); i++) {
+ if (strstr(filename, stage_mappings[i].name))
+ return stage_mappings[i].stage;
+ }
+ return MESA_SHADER_NONE;
+}
+
+int
+main(int argc, char **argv)
+{
+ if (argc != 2) {
+ fprintf(stderr, "Usage: kosmicomp filename.spv\n");
+ return 1;
+ }
+
+ // read file
+ size_t nwords = 0;
+ uint32_t *words = NULL;
+ int result = load_spirv(argv[1], &words, &nwords);
+ if (result == -1) {
+ return 2;
+ }
+
+ // run spirv_to_nir
+ struct spirv_to_nir_options options = {
+ .environment = NIR_SPIRV_VULKAN,
+ .debug =
+ {
+ .func = &debug_callback,
+ .private_data = NULL,
+ },
+ .ubo_addr_format = nir_address_format_64bit_global,
+ .ssbo_addr_format = nir_address_format_64bit_global,
+ .phys_ssbo_addr_format = nir_address_format_64bit_global,
+ };
+ glsl_type_singleton_init_or_ref();
+ struct nir_shader_compiler_options nir_options = {
+ .lower_fdph = 1,
+ };
+ mesa_shader_stage stage = stage_from_filename(argv[1]);
+ if (stage == MESA_SHADER_NONE) {
+ fprintf(stderr, "Couldn't guess shader stage from %s\n", argv[1]);
+ return 4;
+ }
+ nir_shader *shader = spirv_to_nir(words, nwords, NULL, 0, stage, "main",
+ &options, &nir_options);
+ if (!shader) {
+ fprintf(stderr, "Compilation failed!\n");
+ return 3;
+ }
+ // print nir
+ nir_print_shader(shader, stdout);
+ optimize(shader);
+ nir_print_shader(shader, stdout);
+
+ char *msl_text = nir_to_msl(shader, shader);
+
+ fputs(msl_text, stdout);
+
+ ralloc_free(msl_text);
+
+ return 0;
+}
diff --git a/src/kosmickrisp/meson.build b/src/kosmickrisp/meson.build
new file mode 100644
index 00000000000..fe54329e132
--- /dev/null
+++ b/src/kosmickrisp/meson.build
@@ -0,0 +1,16 @@
+# Copyright 2025 LunarG, Inc.
+# Copyright 2025 Google LLC
+# SPDX-License-Identifier: MIT
+
+subdir('bridge')
+subdir('compiler')
+subdir('util')
+subdir('vulkan')
+
+executable(
+ 'kosmicomp',
+ files('kosmicomp.c'),
+ dependencies : [idep_nir, idep_vtn, idep_vulkan_runtime_headers, idep_vulkan_util_headers],
+ link_with: [libkk],
+ link_args: ['-Wl,-undefined,dynamic_lookup'],
+)
diff --git a/src/kosmickrisp/util/kk_dispatch_trampolines_gen.py b/src/kosmickrisp/util/kk_dispatch_trampolines_gen.py
new file mode 100644
index 00000000000..468f5679ea1
--- /dev/null
+++ b/src/kosmickrisp/util/kk_dispatch_trampolines_gen.py
@@ -0,0 +1,195 @@
+# coding=utf-8
+COPYRIGHT = """\
+/*
+ * Copyright 2020 Intel Corporation
+ * Copyright 2025 LunarG, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+"""
+
+import argparse
+import os
+
+from mako.template import Template
+
+# Mesa-local imports must be declared in meson variable
+# '{file_without_suffix}_depend_files'.
+from vk_entrypoints import get_entrypoints_from_xml
+
+TEMPLATE_H = Template(COPYRIGHT + """\
+/* This file generated from ${filename}, don't edit directly. */
+
+#ifndef VK_DISPATCH_TRAMPOLINES_H
+#define VK_DISPATCH_TRAMPOLINES_H
+
+#include "vk_dispatch_table.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern struct vk_physical_device_dispatch_table kk_physical_device_trampolines;
+extern struct vk_device_dispatch_table kk_device_trampolines;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* VK_DISPATCH_TRAMPOLINES_H */
+""")
+
+TEMPLATE_C = Template(COPYRIGHT + """\
+/* This file generated from ${filename}, don't edit directly. */
+
+#include "kk_device.h"
+#include "kk_dispatch_trampolines.h"
+#include "vk_object.h"
+#include "vk_physical_device.h"
+
+% for e in entrypoints:
+ % if not e.is_physical_device_entrypoint() or e.alias:
+ <% continue %>
+ % endif
+ % if e.guard is not None:
+#ifdef ${e.guard}
+ % endif
+static VKAPI_ATTR ${e.return_type} VKAPI_CALL
+${e.prefixed_name('kk_tramp')}(${e.decl_params()})
+{
+ <% assert e.params[0].type == 'VkPhysicalDevice' %>
+ VK_FROM_HANDLE(vk_physical_device, vk_physical_device, ${e.params[0].name});
+ % if e.return_type == 'void':
+ vk_physical_device->dispatch_table.${e.name}(${e.call_params()});
+ % else:
+ return vk_physical_device->dispatch_table.${e.name}(${e.call_params()});
+ % endif
+}
+ % if e.guard is not None:
+#endif
+ % endif
+% endfor
+
+struct vk_physical_device_dispatch_table kk_physical_device_trampolines = {
+% for e in entrypoints:
+ % if not e.is_physical_device_entrypoint() or e.alias:
+ <% continue %>
+ % endif
+ % if e.guard is not None:
+#ifdef ${e.guard}
+ % endif
+ .${e.name} = ${e.prefixed_name('kk_tramp')},
+ % if e.guard is not None:
+#endif
+ % endif
+% endfor
+};
+
+% for e in entrypoints:
+ % if not e.is_device_entrypoint() or e.alias:
+ <% continue %>
+ % endif
+ % if e.guard is not None:
+#ifdef ${e.guard}
+ % endif
+static VKAPI_ATTR ${e.return_type} VKAPI_CALL
+${e.prefixed_name('kk_tramp')}(${e.decl_params()})
+{
+ % if e.params[0].type == 'VkDevice':
+ VK_FROM_HANDLE(kk_device, kk_device, ${e.params[0].name});
+ % if e.return_type == 'void':
+ kk_device->exposed_dispatch_table.${e.name}(${e.call_params()});
+ % else:
+ return kk_device->exposed_dispatch_table.${e.name}(${e.call_params()});
+ % endif
+ % elif e.params[0].type in ('VkCommandBuffer', 'VkQueue'):
+ struct vk_object_base *vk_object = (struct vk_object_base *)${e.params[0].name};
+ struct kk_device *kk_device = container_of(vk_object->device, struct kk_device, vk);
+ % if e.return_type == 'void':
+ kk_device->exposed_dispatch_table.${e.name}(${e.call_params()});
+ % else:
+ return kk_device->exposed_dispatch_table.${e.name}(${e.call_params()});
+ % endif
+ % else:
+ assert(!"Unhandled device child trampoline case: ${e.params[0].type}");
+ % endif
+}
+ % if e.guard is not None:
+#endif
+ % endif
+% endfor
+
+struct vk_device_dispatch_table kk_device_trampolines = {
+% for e in entrypoints:
+ % if not e.is_device_entrypoint() or e.alias:
+ <% continue %>
+ % endif
+ % if e.guard is not None:
+#ifdef ${e.guard}
+ % endif
+ .${e.name} = ${e.prefixed_name('kk_tramp')},
+ % if e.guard is not None:
+#endif
+ % endif
+% endfor
+};
+""")
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--out-c', help='Output C file.')
+ parser.add_argument('--out-h', help='Output H file.')
+ parser.add_argument('--beta', required=True, help='Enable beta extensions.')
+ parser.add_argument('--xml',
+ help='Vulkan API XML file.',
+ required=True,
+ action='append',
+ dest='xml_files')
+ args = parser.parse_args()
+
+ entrypoints = get_entrypoints_from_xml(args.xml_files, args.beta)
+
+ # For outputting entrypoints.h we generate a anv_EntryPoint() prototype
+ # per entry point.
+ try:
+ if args.out_h:
+ with open(args.out_h, 'w', encoding='utf-8') as f:
+ f.write(TEMPLATE_H.render(entrypoints=entrypoints,
+ filename=os.path.basename(__file__)))
+ if args.out_c:
+ with open(args.out_c, 'w', encoding='utf-8') as f:
+ f.write(TEMPLATE_C.render(entrypoints=entrypoints,
+ filename=os.path.basename(__file__)))
+ except Exception:
+ # In the event there's an error, this imports some helpers from mako
+ # to print a useful stack trace and prints it, then exits with
+ # status 1, if python is run with debug; otherwise it just raises
+ # the exception
+ if __debug__:
+ import sys
+ from mako import exceptions
+ sys.stderr.write(exceptions.text_error_template().render() + '\n')
+ sys.exit(1)
+ raise
+
+
+if __name__ == '__main__':
+ main()
diff --git a/src/kosmickrisp/util/meson.build b/src/kosmickrisp/util/meson.build
new file mode 100644
index 00000000000..43316831cec
--- /dev/null
+++ b/src/kosmickrisp/util/meson.build
@@ -0,0 +1,16 @@
+# Copyright © 2025 LunarG, Inc
+# SPDX-License-Identifier: MIT
+
+kk_dispatch_trampolines_gen = files('kk_dispatch_trampolines_gen.py')
+
+kk_dispatch_trampolines = custom_target(
+ 'kk_dispatch_trampolines',
+ input : [kk_dispatch_trampolines_gen, vk_api_xml],
+ output : ['kk_dispatch_trampolines.c', 'kk_dispatch_trampolines.h'],
+ command : [
+ prog_python, '@INPUT0@', '--xml', '@INPUT1@',
+ '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@',
+ '--beta', with_vulkan_beta.to_string()
+ ],
+ depend_files : vk_dispatch_trampolines_gen_depend_files,
+)
diff --git a/src/kosmickrisp/util/vk_entrypoints.py b/src/kosmickrisp/util/vk_entrypoints.py
new file mode 100644
index 00000000000..a8280bae2ff
--- /dev/null
+++ b/src/kosmickrisp/util/vk_entrypoints.py
@@ -0,0 +1,147 @@
+# Copyright 2020 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sub license, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+import xml.etree.ElementTree as et
+
+from collections import OrderedDict, namedtuple
+
+# Mesa-local imports must be declared in meson variable
+# '{file_without_suffix}_depend_files'.
+from vk_extensions import get_all_required, filter_api
+
+EntrypointParam = namedtuple('EntrypointParam', 'type name decl len')
+
+class EntrypointBase:
+ def __init__(self, name):
+ assert name.startswith('vk')
+ self.name = name[2:]
+ self.alias = None
+ self.guard = None
+ self.entry_table_index = None
+ # Extensions which require this entrypoint
+ self.core_version = None
+ self.extensions = []
+
+ def prefixed_name(self, prefix):
+ return prefix + '_' + self.name
+
+class Entrypoint(EntrypointBase):
+ def __init__(self, name, return_type, params):
+ super(Entrypoint, self).__init__(name)
+ self.return_type = return_type
+ self.params = params
+ self.guard = None
+ self.aliases = []
+ self.disp_table_index = None
+
+ def is_physical_device_entrypoint(self):
+ return self.params[0].type in ('VkPhysicalDevice', )
+
+ def is_device_entrypoint(self):
+ return self.params[0].type in ('VkDevice', 'VkCommandBuffer', 'VkQueue')
+
+ def decl_params(self, start=0):
+ return ', '.join(p.decl for p in self.params[start:])
+
+ def call_params(self, start=0):
+ return ', '.join(p.name for p in self.params[start:])
+
+class EntrypointAlias(EntrypointBase):
+ def __init__(self, name, entrypoint):
+ super(EntrypointAlias, self).__init__(name)
+ self.alias = entrypoint
+ entrypoint.aliases.append(self)
+
+ def is_physical_device_entrypoint(self):
+ return self.alias.is_physical_device_entrypoint()
+
+ def is_device_entrypoint(self):
+ return self.alias.is_device_entrypoint()
+
+ def prefixed_name(self, prefix):
+ return self.alias.prefixed_name(prefix)
+
+ @property
+ def params(self):
+ return self.alias.params
+
+ @property
+ def return_type(self):
+ return self.alias.return_type
+
+ @property
+ def disp_table_index(self):
+ return self.alias.disp_table_index
+
+ def decl_params(self):
+ return self.alias.decl_params()
+
+ def call_params(self):
+ return self.alias.call_params()
+
+def get_entrypoints(doc, api, beta):
+ """Extract the entry points from the registry."""
+ entrypoints = OrderedDict()
+
+ required = get_all_required(doc, 'command', api, beta)
+
+ for command in doc.findall('./commands/command'):
+ if not filter_api(command, api):
+ continue
+
+ if 'alias' in command.attrib:
+ name = command.attrib['name']
+ target = command.attrib['alias']
+ e = EntrypointAlias(name, entrypoints[target])
+ else:
+ name = command.find('./proto/name').text
+ ret_type = command.find('./proto/type').text
+ params = [EntrypointParam(
+ type=p.find('./type').text,
+ name=p.find('./name').text,
+ decl=''.join(p.itertext()),
+ len=p.attrib.get('altlen', p.attrib.get('len', None))
+ ) for p in command.findall('./param') if filter_api(p, api)]
+ # They really need to be unique
+ e = Entrypoint(name, ret_type, params)
+
+ if name not in required:
+ continue
+
+ r = required[name]
+ e.core_version = r.core_version
+ e.extensions = r.extensions
+ e.guard = r.guard
+
+ assert name not in entrypoints, name
+ entrypoints[name] = e
+
+ return entrypoints.values()
+
+def get_entrypoints_from_xml(xml_files, beta, api='vulkan'):
+ entrypoints = []
+
+ for filename in xml_files:
+ doc = et.parse(filename)
+ entrypoints += get_entrypoints(doc, api, beta)
+
+ return entrypoints
diff --git a/src/kosmickrisp/util/vk_extensions.py b/src/kosmickrisp/util/vk_extensions.py
new file mode 100644
index 00000000000..f58b04f71cb
--- /dev/null
+++ b/src/kosmickrisp/util/vk_extensions.py
@@ -0,0 +1,371 @@
+import copy
+import re
+import xml.etree.ElementTree as et
+
+def get_api_list(s):
+ apis = []
+ for a in s.split(','):
+ if a == 'disabled':
+ continue
+ assert a in ('vulkan', 'vulkansc')
+ apis.append(a)
+ return apis
+
+class Extension:
+ def __init__(self, name, number, ext_version):
+ self.name = name
+ self.type = None
+ self.number = number
+ self.platform = None
+ self.provisional = False
+ self.ext_version = int(ext_version)
+ self.supported = []
+
+ def from_xml(ext_elem):
+ name = ext_elem.attrib['name']
+ number = int(ext_elem.attrib['number'])
+ supported = get_api_list(ext_elem.attrib['supported'])
+ if name == 'VK_ANDROID_native_buffer':
+ assert not supported
+ supported = ['vulkan']
+
+ if not supported:
+ return Extension(name, number, 0)
+
+ version = None
+ for enum_elem in ext_elem.findall('.require/enum'):
+ if enum_elem.attrib['name'].endswith('_SPEC_VERSION'):
+ # Skip alias SPEC_VERSIONs
+ if 'value' in enum_elem.attrib:
+ assert version is None
+ version = int(enum_elem.attrib['value'])
+
+ assert version is not None
+ ext = Extension(name, number, version)
+ ext.type = ext_elem.attrib['type']
+ ext.platform = ext_elem.attrib.get('platform', None)
+ ext.provisional = ext_elem.attrib.get('provisional', False)
+ ext.supported = supported
+
+ return ext
+
+ def c_android_condition(self):
+ # if it's an EXT or vendor extension, it's allowed
+ if not self.name.startswith(ANDROID_EXTENSION_WHITELIST_PREFIXES):
+ return 'true'
+
+ allowed_version = ALLOWED_ANDROID_VERSION.get(self.name, None)
+ if allowed_version is None:
+ return 'false'
+
+ return 'ANDROID_API_LEVEL >= %d' % (allowed_version)
+
+class ApiVersion:
+ def __init__(self, version):
+ self.version = version
+
+class VkVersion:
+ def __init__(self, string):
+ split = string.split('.')
+ self.major = int(split[0])
+ self.minor = int(split[1])
+ if len(split) > 2:
+ assert len(split) == 3
+ self.patch = int(split[2])
+ else:
+ self.patch = None
+
+ # Sanity check. The range bits are required by the definition of the
+ # VK_MAKE_VERSION macro
+ assert self.major < 1024 and self.minor < 1024
+ assert self.patch is None or self.patch < 4096
+ assert str(self) == string
+
+ def __str__(self):
+ ver_list = [str(self.major), str(self.minor)]
+ if self.patch is not None:
+ ver_list.append(str(self.patch))
+ return '.'.join(ver_list)
+
+ def c_vk_version(self):
+ ver_list = [str(self.major), str(self.minor), str(self.patch or 0)]
+ return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')'
+
+ def __int_ver(self):
+ # This is just an expansion of VK_VERSION
+ return (self.major << 22) | (self.minor << 12) | (self.patch or 0)
+
+ def __gt__(self, other):
+ # If only one of them has a patch version, "ignore" it by making
+ # other's patch version match self.
+ if (self.patch is None) != (other.patch is None):
+ other = copy.copy(other)
+ other.patch = self.patch
+
+ return self.__int_ver() > other.__int_ver()
+
+# Sort the extension list the way we expect: KHR, then EXT, then vendors
+# alphabetically. For digits, read them as a whole number sort that.
+# eg.: VK_KHR_8bit_storage < VK_KHR_16bit_storage < VK_EXT_acquire_xlib_display
+def extension_order(ext):
+ order = []
+ for substring in re.split('(KHR|EXT|[0-9]+)', ext.name):
+ if substring == 'KHR':
+ order.append(1)
+ if substring == 'EXT':
+ order.append(2)
+ elif substring.isdigit():
+ order.append(int(substring))
+ else:
+ order.append(substring)
+ return order
+
+def get_all_exts_from_xml(xml, api='vulkan'):
+ """ Get a list of all Vulkan extensions. """
+
+ xml = et.parse(xml)
+
+ extensions = []
+ for ext_elem in xml.findall('.extensions/extension'):
+ ext = Extension.from_xml(ext_elem)
+ if api in ext.supported:
+ extensions.append(ext)
+
+ return sorted(extensions, key=extension_order)
+
+def init_exts_from_xml(xml, extensions, platform_defines):
+ """ Walk the Vulkan XML and fill out extra extension information. """
+
+ xml = et.parse(xml)
+
+ ext_name_map = {}
+ for ext in extensions:
+ ext_name_map[ext.name] = ext
+
+ # KHR_display is missing from the list.
+ platform_defines.append('VK_USE_PLATFORM_DISPLAY_KHR')
+ for platform in xml.findall('./platforms/platform'):
+ platform_defines.append(platform.attrib['protect'])
+
+ for ext_elem in xml.findall('.extensions/extension'):
+ ext_name = ext_elem.attrib['name']
+ if ext_name not in ext_name_map:
+ continue
+
+ ext = ext_name_map[ext_name]
+ ext.type = ext_elem.attrib['type']
+
+class Requirements:
+ def __init__(self, core_version=None):
+ self.core_version = core_version
+ self.extensions = []
+ self.guard = None
+
+ def add_extension(self, ext):
+ for e in self.extensions:
+ if e == ext:
+ return;
+ assert e.name != ext.name
+
+ self.extensions.append(ext)
+
+def filter_api(elem, api):
+ if 'api' not in elem.attrib:
+ return True
+
+ return api in elem.attrib['api'].split(',')
+
+def get_alias(aliases, name):
+ if name in aliases:
+ # in case the spec registry adds an alias chain later
+ return get_alias(aliases, aliases[name])
+ return name
+
+def get_all_required(xml, thing, api, beta):
+ things = {}
+ aliases = {}
+ for struct in xml.findall('./types/type[@category="struct"][@alias]'):
+ if not filter_api(struct, api):
+ continue
+
+ name = struct.attrib['name']
+ alias = struct.attrib['alias']
+ aliases[name] = alias
+
+ for feature in xml.findall('./feature'):
+ if not filter_api(feature, api):
+ continue
+
+ version = VkVersion(feature.attrib['number'])
+ for t in feature.findall('./require/' + thing):
+ name = t.attrib['name']
+ assert name not in things
+ things[name] = Requirements(core_version=version)
+
+ for extension in xml.findall('.extensions/extension'):
+ ext = Extension.from_xml(extension)
+ if api not in ext.supported:
+ continue
+
+ if beta != 'true' and ext.provisional:
+ continue
+
+ for require in extension.findall('./require'):
+ if not filter_api(require, api):
+ continue
+
+ for t in require.findall('./' + thing):
+ name = get_alias(aliases, t.attrib['name'])
+ r = things.setdefault(name, Requirements())
+ r.add_extension(ext)
+
+ platform_defines = {}
+ for platform in xml.findall('./platforms/platform'):
+ name = platform.attrib['name']
+ define = platform.attrib['protect']
+ platform_defines[name] = define
+
+ for req in things.values():
+ if req.core_version is not None:
+ continue
+
+ for ext in req.extensions:
+ if ext.platform in platform_defines:
+ req.guard = platform_defines[ext.platform]
+ break
+
+ return things
+
+# Mapping between extension name and the android version in which the extension
+# was whitelisted in Android CTS's dEQP-VK.info.device_extensions and
+# dEQP-VK.api.info.android.no_unknown_extensions, excluding those blocked by
+# android.graphics.cts.VulkanFeaturesTest#testVulkanBlockedExtensions.
+ALLOWED_ANDROID_VERSION = {
+ # checkInstanceExtensions on oreo-cts-release
+ "VK_KHR_surface": 26,
+ "VK_KHR_display": 26,
+ "VK_KHR_android_surface": 26,
+ "VK_KHR_mir_surface": 26,
+ "VK_KHR_wayland_surface": 26,
+ "VK_KHR_win32_surface": 26,
+ "VK_KHR_xcb_surface": 26,
+ "VK_KHR_xlib_surface": 26,
+ "VK_KHR_get_physical_device_properties2": 26,
+ "VK_KHR_get_surface_capabilities2": 26,
+ "VK_KHR_external_memory_capabilities": 26,
+ "VK_KHR_external_semaphore_capabilities": 26,
+ "VK_KHR_external_fence_capabilities": 26,
+ # on pie-cts-release
+ "VK_KHR_device_group_creation": 28,
+ "VK_KHR_get_display_properties2": 28,
+ # on android10-tests-release
+ "VK_KHR_surface_protected_capabilities": 29,
+ # on android13-tests-release
+ "VK_KHR_portability_enumeration": 33,
+
+ # checkDeviceExtensions on oreo-cts-release
+ "VK_KHR_swapchain": 26,
+ "VK_KHR_display_swapchain": 26,
+ "VK_KHR_sampler_mirror_clamp_to_edge": 26,
+ "VK_KHR_shader_draw_parameters": 26,
+ "VK_KHR_maintenance1": 26,
+ "VK_KHR_push_descriptor": 26,
+ "VK_KHR_descriptor_update_template": 26,
+ "VK_KHR_incremental_present": 26,
+ "VK_KHR_shared_presentable_image": 26,
+ "VK_KHR_storage_buffer_storage_class": 26,
+ "VK_KHR_16bit_storage": 26,
+ "VK_KHR_get_memory_requirements2": 26,
+ "VK_KHR_external_memory": 26,
+ "VK_KHR_external_memory_fd": 26,
+ "VK_KHR_external_memory_win32": 26,
+ "VK_KHR_external_semaphore": 26,
+ "VK_KHR_external_semaphore_fd": 26,
+ "VK_KHR_external_semaphore_win32": 26,
+ "VK_KHR_external_fence": 26,
+ "VK_KHR_external_fence_fd": 26,
+ "VK_KHR_external_fence_win32": 26,
+ "VK_KHR_win32_keyed_mutex": 26,
+ "VK_KHR_dedicated_allocation": 26,
+ "VK_KHR_variable_pointers": 26,
+ "VK_KHR_relaxed_block_layout": 26,
+ "VK_KHR_bind_memory2": 26,
+ "VK_KHR_maintenance2": 26,
+ "VK_KHR_image_format_list": 26,
+ "VK_KHR_sampler_ycbcr_conversion": 26,
+ # on oreo-mr1-cts-release
+ "VK_KHR_draw_indirect_count": 27,
+ # on pie-cts-release
+ "VK_KHR_device_group": 28,
+ "VK_KHR_multiview": 28,
+ "VK_KHR_maintenance3": 28,
+ "VK_KHR_create_renderpass2": 28,
+ "VK_KHR_driver_properties": 28,
+ # on android10-tests-release
+ "VK_KHR_shader_float_controls": 29,
+ "VK_KHR_shader_float16_int8": 29,
+ "VK_KHR_8bit_storage": 29,
+ "VK_KHR_depth_stencil_resolve": 29,
+ "VK_KHR_swapchain_mutable_format": 29,
+ "VK_KHR_shader_atomic_int64": 29,
+ "VK_KHR_vulkan_memory_model": 29,
+ "VK_KHR_swapchain_mutable_format": 29,
+ "VK_KHR_uniform_buffer_standard_layout": 29,
+ # on android11-tests-release
+ "VK_KHR_imageless_framebuffer": 30,
+ "VK_KHR_shader_subgroup_extended_types": 30,
+ "VK_KHR_buffer_device_address": 30,
+ "VK_KHR_separate_depth_stencil_layouts": 30,
+ "VK_KHR_timeline_semaphore": 30,
+ "VK_KHR_spirv_1_4": 30,
+ "VK_KHR_pipeline_executable_properties": 30,
+ "VK_KHR_shader_clock": 30,
+ # blocked by testVulkanBlockedExtensions
+ # "VK_KHR_performance_query": 30,
+ "VK_KHR_shader_non_semantic_info": 30,
+ "VK_KHR_copy_commands2": 30,
+ # on android12-tests-release
+ "VK_KHR_shader_terminate_invocation": 31,
+ "VK_KHR_ray_tracing_pipeline": 31,
+ "VK_KHR_ray_query": 31,
+ "VK_KHR_acceleration_structure": 31,
+ "VK_KHR_pipeline_library": 31,
+ "VK_KHR_deferred_host_operations": 31,
+ "VK_KHR_fragment_shading_rate": 31,
+ "VK_KHR_zero_initialize_workgroup_memory": 31,
+ "VK_KHR_workgroup_memory_explicit_layout": 31,
+ "VK_KHR_synchronization2": 31,
+ "VK_KHR_shader_integer_dot_product": 31,
+ # on android13-tests-release
+ "VK_KHR_dynamic_rendering": 33,
+ "VK_KHR_format_feature_flags2": 33,
+ "VK_KHR_global_priority": 33,
+ "VK_KHR_maintenance4": 33,
+ "VK_KHR_portability_subset": 33,
+ "VK_KHR_present_id": 33,
+ "VK_KHR_present_wait": 33,
+ "VK_KHR_shader_subgroup_uniform_control_flow": 33,
+
+ # testNoUnknownExtensions on oreo-cts-release
+ "VK_GOOGLE_display_timing": 26,
+ # on pie-cts-release
+ "VK_ANDROID_external_memory_android_hardware_buffer": 28,
+ # on android11-tests-release
+ "VK_GOOGLE_decorate_string": 30,
+ "VK_GOOGLE_hlsl_functionality1": 30,
+ # on android13-tests-release
+ "VK_GOOGLE_surfaceless_query": 33,
+
+ # this HAL extension is always allowed and will be filtered out by the
+ # loader
+ "VK_ANDROID_native_buffer": 26,
+}
+
+# Extensions with these prefixes are checked in Android CTS, and thus must be
+# whitelisted per the preceding dict.
+ANDROID_EXTENSION_WHITELIST_PREFIXES = (
+ "VK_KHX",
+ "VK_KHR",
+ "VK_GOOGLE",
+ "VK_ANDROID"
+)
diff --git a/src/kosmickrisp/vulkan/cl/kk_query.cl b/src/kosmickrisp/vulkan/cl/kk_query.cl
new file mode 100644
index 00000000000..ef24ab8ab7b
--- /dev/null
+++ b/src/kosmickrisp/vulkan/cl/kk_query.cl
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright © 2024 Alyssa Rosenzweig
+ * Copyright © 2024 Valve Corporation
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+#include "compiler/libcl/libcl_vk.h"
+
+#include "kk_query.h"
+
+void
+libkk_write_u64(global struct libkk_imm_write *write_array)
+{
+ *write_array[cl_group_id.x].address = write_array[cl_group_id.x].value;
+}
+
+void
+libkk_copy_queries(global uint64_t *availability, global uint64_t *results,
+ global uint16_t *oq_index, uint64_t dst_addr,
+ uint64_t dst_stride, uint32_t first_query,
+ VkQueryResultFlagBits flags, uint16_t reports_per_query)
+{
+ uint index = cl_group_id.x;
+ uint64_t dst = dst_addr + (((uint64_t)index) * dst_stride);
+ uint32_t query = first_query + index;
+
+ bool available;
+ if (availability)
+ available = availability[query];
+ else
+ available = (results[query] != LIBKK_QUERY_UNAVAILABLE);
+
+ if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
+ /* For occlusion queries, results[] points to the device global heap. We
+ * need to remap indices according to the query pool's allocation.
+ */
+ uint result_index = oq_index ? oq_index[query] : query;
+ uint idx = result_index * reports_per_query;
+
+ for (unsigned i = 0; i < reports_per_query; ++i) {
+ vk_write_query(dst, i, flags, results[idx + i]);
+ }
+ }
+
+ if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+ vk_write_query(dst, reports_per_query, flags, available);
+ }
+}
diff --git a/src/kosmickrisp/vulkan/cl/kk_query.h b/src/kosmickrisp/vulkan/cl/kk_query.h
new file mode 100644
index 00000000000..8b37d36bb74
--- /dev/null
+++ b/src/kosmickrisp/vulkan/cl/kk_query.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright © 2024 Alyssa Rosenzweig
+ * Copyright © 2024 Valve Corporation
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef KK_QUERY_H
+#define KK_QUERY_H
+
+#include "compiler/libcl/libcl.h"
+
+struct libkk_imm_write {
+ DEVICE(uint64_t) address;
+ uint64_t value;
+};
+
+#define LIBKK_QUERY_UNAVAILABLE (uint64_t)((int64_t)-1)
+
+#endif /* KK_QUERY_H */
diff --git a/src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl b/src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl
new file mode 100644
index 00000000000..bc2e250d072
--- /dev/null
+++ b/src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl
@@ -0,0 +1,283 @@
+/*
+ * Copyright 2023 Alyssa Rosenzweig
+ * Copyright 2023 Valve Corporation
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+#include "compiler/libcl/libcl_vk.h"
+#include "compiler/shader_enums.h"
+
+static uint
+libkk_vertex_id_for_line_loop(uint prim, uint vert, uint num_prims)
+{
+ /* (0, 1), (1, 2), (2, 0) */
+ if (prim == (num_prims - 1) && vert == 1)
+ return 0;
+ else
+ return prim + vert;
+}
+
+/* Swap the two non-provoking vertices third vert in odd triangles. This
+ * generates a vertex ID list with a consistent winding order.
+ *
+ * With prim and flatshade_first, the map : [0, 1, 2] -> [0, 1, 2] is its own
+ * inverse. This lets us reuse it for both vertex fetch and transform feedback.
+ */
+static uint
+libagx_map_vertex_in_tri_strip(uint prim, uint vert, bool flatshade_first)
+{
+ unsigned pv = flatshade_first ? 0 : 2;
+
+ bool even = (prim & 1) == 0;
+ bool provoking = vert == pv;
+
+ return (provoking || even) ? vert : ((3 - pv) - vert);
+}
+
+static uint
+libkk_vertex_id_for_tri_fan(uint prim, uint vert, bool flatshade_first)
+{
+ /* Vulkan spec section 20.1.7 gives (i + 1, i + 2, 0) for a provoking
+ * first. OpenGL instead wants (0, i + 1, i + 2) with a provoking last.
+ * Piglit clipflat expects us to switch between these orders depending on
+ * provoking vertex, to avoid trivializing the fan.
+ *
+ * Rotate accordingly.
+ */
+ if (flatshade_first) {
+ vert = (vert == 2) ? 0 : (vert + 1);
+ }
+
+ /* The simpler form assuming last is provoking. */
+ return (vert == 0) ? 0 : prim + vert;
+}
+
+static uint
+libkk_vertex_id_for_tri_strip_adj(uint prim, uint vert, uint num_prims,
+ bool flatshade_first)
+{
+ /* See Vulkan spec section 20.1.11 "Triangle Strips With Adjancency".
+ *
+ * There are different cases for first/middle/last/only primitives and for
+ * odd/even primitives. Determine which case we're in.
+ */
+ bool last = prim == (num_prims - 1);
+ bool first = prim == 0;
+ bool even = (prim & 1) == 0;
+ bool even_or_first = even || first;
+
+ /* When the last vertex is provoking, we rotate the primitives
+ * accordingly. This seems required for OpenGL.
+ */
+ if (!flatshade_first && !even_or_first) {
+ vert = (vert + 4u) % 6u;
+ }
+
+ /* Offsets per the spec. The spec lists 6 cases with 6 offsets. Luckily,
+ * there are lots of patterns we can exploit, avoiding a full 6x6 LUT.
+ *
+ * Here we assume the first vertex is provoking, the Vulkan default.
+ */
+ uint offsets[6] = {
+ 0,
+ first ? 1 : (even ? -2 : 3),
+ even_or_first ? 2 : 4,
+ last ? 5 : 6,
+ even_or_first ? 4 : 2,
+ even_or_first ? 3 : -2,
+ };
+
+ /* Ensure NIR can see thru the local array */
+ uint offset = 0;
+ for (uint i = 1; i < 6; ++i) {
+ if (i == vert)
+ offset = offsets[i];
+ }
+
+ /* Finally add to the base of the primitive */
+ return (prim * 2) + offset;
+}
+
+static uint
+vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first, uint prim,
+ uint vert, uint num_prims)
+{
+ switch (mode) {
+ case MESA_PRIM_POINTS:
+ case MESA_PRIM_LINES:
+ case MESA_PRIM_TRIANGLES:
+ case MESA_PRIM_LINES_ADJACENCY:
+ case MESA_PRIM_TRIANGLES_ADJACENCY:
+ /* Regular primitive: every N vertices defines a primitive */
+ return (prim * mesa_vertices_per_prim(mode)) + vert;
+
+ case MESA_PRIM_LINE_LOOP:
+ return libkk_vertex_id_for_line_loop(prim, vert, num_prims);
+
+ case MESA_PRIM_LINE_STRIP:
+ case MESA_PRIM_LINE_STRIP_ADJACENCY:
+ /* (i, i + 1) or (i, ..., i + 3) */
+ return prim + vert;
+
+ case MESA_PRIM_TRIANGLE_STRIP: {
+ /* Order depends on the provoking vert.
+ *
+ * First: (0, 1, 2), (1, 3, 2), (2, 3, 4).
+ * Last: (0, 1, 2), (2, 1, 3), (2, 3, 4).
+ *
+ * Pull the (maybe swapped) vert from the corresponding primitive
+ */
+ return prim + libagx_map_vertex_in_tri_strip(prim, vert, flatshade_first);
+ }
+
+ case MESA_PRIM_TRIANGLE_FAN:
+ return libkk_vertex_id_for_tri_fan(prim, vert, flatshade_first);
+
+ case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
+ return libkk_vertex_id_for_tri_strip_adj(prim, vert, num_prims,
+ flatshade_first);
+
+ default:
+ return 0;
+ }
+}
+
+static void
+store_index(global uint8_t *index_buffer, uint index_size_B, uint id,
+ uint value)
+{
+ global uint32_t *out_32 = (global uint32_t *)index_buffer;
+ global uint16_t *out_16 = (global uint16_t *)index_buffer;
+ global uint8_t *out_8 = (global uint8_t *)index_buffer;
+
+ if (index_size_B == 4)
+ out_32[id] = value;
+ else if (index_size_B == 2)
+ out_16[id] = value;
+ else
+ out_8[id] = value;
+}
+
+static uint
+load_index(constant uint8_t *index_buffer, uint32_t index_buffer_range_el,
+ uint id, uint index_size)
+{
+ /* We have no index buffer, index is the id */
+ if (index_buffer == 0u)
+ return id;
+
+ /* When no index_buffer is present, index_buffer_range_el is vtx count */
+ bool oob = id >= index_buffer_range_el;
+
+ /* If the load would be out-of-bounds, load the first element which is
+ * assumed valid. If the application index buffer is empty with robustness2,
+ * index_buffer will point to a zero sink where only the first is valid.
+ */
+ if (oob) {
+ id = 0u;
+ }
+
+ uint el;
+ if (index_size == 1) {
+ el = ((constant uint8_t *)index_buffer)[id];
+ } else if (index_size == 2) {
+ el = ((constant uint16_t *)index_buffer)[id];
+ } else {
+ el = ((constant uint32_t *)index_buffer)[id];
+ }
+
+ /* D3D robustness semantics. TODO: Optimize? */
+ if (oob) {
+ el = 0;
+ }
+
+ return el;
+}
+
+/*
+ * Return the ID of the first thread in the workgroup where cond is true, or
+ * 1024 if cond is false across the workgroup.
+ */
+static uint
+first_true_thread_in_workgroup(bool cond, local uint *scratch)
+{
+ barrier(CLK_LOCAL_MEM_FENCE);
+ scratch[get_sub_group_id()] = sub_group_ballot(cond)[0];
+ barrier(CLK_LOCAL_MEM_FENCE);
+
+ uint first_group =
+ ctz(sub_group_ballot(scratch[get_sub_group_local_id()])[0]);
+ uint off = ctz(first_group < 32 ? scratch[first_group] : 0);
+ return (first_group * 32) + off;
+}
+
+// TODO_KOSMICKRISP
+// KERNEL(1024)
+void
+libkk_unroll_geometry_and_restart(
+ constant uint8_t *index_buffer, global uint8_t *out_ptr,
+ constant uint32_t *in_draw, global uint32_t *out_draw,
+ uint32_t restart_index, uint32_t index_buffer_size_el, uint32_t in_el_size_B,
+ uint32_t out_el_size_B, uint32_t flatshade_first, uint32_t mode)
+{
+ uint tid = cl_local_id.x;
+ uint count = in_draw[0];
+
+ constant uint8_t *in_ptr =
+ index_buffer ? index_buffer + (in_draw[2] * in_el_size_B) : index_buffer;
+
+ // local uint scratch[32];
+
+ uint out_prims = 0;
+ uint needle = 0;
+ uint per_prim = mesa_vertices_per_prim(mode);
+ while (needle < count) {
+ /* Search for next restart or the end. Lanes load in parallel. */
+ uint next_restart = needle;
+ for (;;) {
+ uint idx = next_restart + tid;
+ bool restart =
+ idx >= count || load_index(in_ptr, index_buffer_size_el, idx,
+ in_el_size_B) == restart_index;
+
+ // uint next_offs = first_true_thread_in_workgroup(restart, scratch);
+
+ // next_restart += next_offs;
+ // if (next_offs < 1024)
+ // break;
+ if (restart)
+ break;
+ next_restart++;
+ }
+
+ /* Emit up to the next restart. Lanes output in parallel */
+ uint subcount = next_restart - needle;
+ uint subprims = u_decomposed_prims_for_vertices(mode, subcount);
+ uint out_prims_base = out_prims;
+ for (uint i = tid; i < subprims; /*i += 1024*/ ++i) {
+ for (uint vtx = 0; vtx < per_prim; ++vtx) {
+ uint id =
+ vertex_id_for_topology(mode, flatshade_first, i, vtx, subprims);
+ uint offset = needle + id;
+
+ uint x = ((out_prims_base + i) * per_prim) + vtx;
+ uint y =
+ load_index(in_ptr, index_buffer_size_el, offset, in_el_size_B);
+
+ store_index(out_ptr, out_el_size_B, x, y);
+ }
+ }
+
+ out_prims += subprims;
+ needle = next_restart + 1;
+ }
+
+ if (tid == 0) {
+ out_draw[0] = out_prims * per_prim; /* indexCount */
+ out_draw[1] = in_draw[1]; /* instanceCount */
+ out_draw[2] = 0u; /* firstIndex */
+ out_draw[3] = index_buffer ? in_draw[3] : in_draw[2]; /* vertexOffset */
+ out_draw[4] = index_buffer ? in_draw[4] : in_draw[3]; /* firstInstance */
+ }
+}
diff --git a/src/kosmickrisp/vulkan/kk_bo.c b/src/kosmickrisp/vulkan/kk_bo.c
new file mode 100644
index 00000000000..708b93fb2ed
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_bo.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_bo.h"
+
+#include "kk_device.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+#include "util/u_memory.h"
+
+VkResult
+kk_alloc_bo(struct kk_device *dev, struct vk_object_base *log_obj,
+ uint64_t size_B, uint64_t align_B, struct kk_bo **bo_out)
+{
+ VkResult result = VK_SUCCESS;
+
+ // TODO_KOSMICKRISP: Probably requires handling the buffer maximum 256MB
+ uint64_t minimum_alignment = 0u;
+ mtl_heap_buffer_size_and_align_with_length(dev->mtl_handle, &size_B,
+ &minimum_alignment);
+ minimum_alignment = MAX2(minimum_alignment, align_B);
+ size_B = align64(size_B, minimum_alignment);
+ mtl_heap *handle =
+ mtl_new_heap(dev->mtl_handle, size_B, KK_MTL_RESOURCE_OPTIONS);
+ if (handle == NULL) {
+ result = vk_errorf(log_obj, VK_ERROR_OUT_OF_DEVICE_MEMORY, "%m");
+ goto fail_heap;
+ }
+
+ mtl_buffer *map = mtl_new_buffer_with_length(handle, size_B, 0u);
+ if (map == NULL) {
+ result = vk_errorf(log_obj, VK_ERROR_OUT_OF_DEVICE_MEMORY, "%m");
+ goto fail_map;
+ }
+
+ struct kk_bo *bo = CALLOC_STRUCT(kk_bo);
+
+ if (bo == NULL) {
+ result = vk_errorf(log_obj, VK_ERROR_OUT_OF_HOST_MEMORY, "%m");
+ goto fail_alloc;
+ }
+
+ bo->mtl_handle = handle;
+ bo->size_B = size_B;
+ bo->map = map;
+ bo->gpu = mtl_buffer_get_gpu_address(map);
+ bo->cpu = mtl_get_contents(map);
+
+ *bo_out = bo;
+ return result;
+
+fail_alloc:
+ mtl_release(map);
+fail_map:
+ mtl_release(handle);
+fail_heap:
+ return result;
+}
+
+void
+kk_destroy_bo(struct kk_device *dev, struct kk_bo *bo)
+{
+ mtl_release(bo->map);
+ mtl_release(bo->mtl_handle);
+ FREE(bo);
+}
diff --git a/src/kosmickrisp/vulkan/kk_bo.h b/src/kosmickrisp/vulkan/kk_bo.h
new file mode 100644
index 00000000000..5a97723720b
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_bo.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright © 2025 LunarG, Inc
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_BO_H
+#define KK_BO_H 1
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "vulkan/vulkan_core.h"
+
+#include <inttypes.h>
+
+struct kk_device;
+struct vk_object_base;
+
+struct kk_bo {
+ mtl_heap *mtl_handle;
+ mtl_buffer *map;
+ uint64_t size_B;
+ uint64_t gpu; // GPU address
+ void *cpu; // CPU address
+};
+
+VkResult kk_alloc_bo(struct kk_device *dev, struct vk_object_base *log_obj,
+ uint64_t size_B, uint64_t align_B, struct kk_bo **bo_out);
+
+void kk_destroy_bo(struct kk_device *dev, struct kk_bo *bo);
+
+#endif /* KK_BO_H */
diff --git a/src/kosmickrisp/vulkan/kk_buffer.c b/src/kosmickrisp/vulkan/kk_buffer.c
new file mode 100644
index 00000000000..75c10ab86f9
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_buffer.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_buffer.h"
+
+#include "kk_device.h"
+#include "kk_entrypoints.h"
+#include "kk_physical_device.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+static uint64_t
+kk_get_buffer_alignment(const struct kk_physical_device *pdev, uint64_t size,
+ VkBufferUsageFlags2KHR usage_flags,
+ VkBufferCreateFlags create_flags)
+{
+ uint64_t alignment;
+ mtl_heap_buffer_size_and_align_with_length(pdev->mtl_dev_handle, &size,
+ &alignment);
+
+ /** TODO_KOSMICKRISP Metal requires that texel buffers be aligned to the
+ * format they'll use. Since we won't be able to know the format until the
+ * view is created, we should align to the worst case scenario. For this, we
+ * need to request all supported format alignments and take the largest one.
+ */
+ return alignment;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ struct kk_buffer *buffer;
+
+ if (pCreateInfo->size > KK_MAX_BUFFER_SIZE)
+ return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ buffer =
+ vk_buffer_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*buffer));
+ if (!buffer)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ *pBuffer = kk_buffer_to_handle(buffer);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_DestroyBuffer(VkDevice device, VkBuffer _buffer,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_buffer, buffer, _buffer);
+
+ if (!buffer)
+ return;
+
+ if (buffer->mtl_handle)
+ mtl_release(buffer->mtl_handle);
+
+ vk_buffer_destroy(&dev->vk, pAllocator, &buffer->vk);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetDeviceBufferMemoryRequirements(
+ VkDevice device, const VkDeviceBufferMemoryRequirements *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ struct kk_physical_device *pdev = kk_device_physical(dev);
+
+ const uint64_t alignment = kk_get_buffer_alignment(
+ pdev, pInfo->pCreateInfo->size, pInfo->pCreateInfo->usage,
+ pInfo->pCreateInfo->flags);
+
+ pMemoryRequirements->memoryRequirements = (VkMemoryRequirements){
+ .size = align64(pInfo->pCreateInfo->size, alignment),
+ .alignment = alignment,
+ .memoryTypeBits = BITFIELD_MASK(pdev->mem_type_count),
+ };
+
+ vk_foreach_struct_const(ext, pMemoryRequirements->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+ VkMemoryDedicatedRequirements *dedicated = (void *)ext;
+ dedicated->prefersDedicatedAllocation = false;
+ dedicated->requiresDedicatedAllocation = false;
+ break;
+ }
+ default:
+ vk_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetPhysicalDeviceExternalBufferProperties(
+ VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
+ VkExternalBufferProperties *pExternalBufferProperties)
+{
+ /* The Vulkan 1.3.256 spec says:
+ *
+ * VUID-VkPhysicalDeviceExternalBufferInfo-handleType-parameter
+ *
+ * "handleType must be a valid VkExternalMemoryHandleTypeFlagBits value"
+ *
+ * This differs from VkPhysicalDeviceExternalImageFormatInfo, which
+ * surprisingly permits handleType == 0.
+ */
+ assert(pExternalBufferInfo->handleType != 0);
+
+ /* All of the current flags are for sparse which we don't support yet.
+ * Even when we do support it, doing sparse on external memory sounds
+ * sketchy. Also, just disallowing flags is the safe option.
+ */
+ if (pExternalBufferInfo->flags)
+ goto unsupported;
+
+ switch (pExternalBufferInfo->handleType) {
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT:
+ pExternalBufferProperties->externalMemoryProperties =
+ kk_mtlheap_mem_props;
+ return;
+ default:
+ goto unsupported;
+ }
+
+unsupported:
+ /* From the Vulkan 1.3.256 spec:
+ *
+ * compatibleHandleTypes must include at least handleType.
+ */
+ pExternalBufferProperties->externalMemoryProperties =
+ (VkExternalMemoryProperties){
+ .compatibleHandleTypes = pExternalBufferInfo->handleType,
+ };
+}
+
+static VkResult
+kk_bind_buffer_memory(struct kk_device *dev, const VkBindBufferMemoryInfo *info)
+{
+ // Do the actual memory binding
+ VK_FROM_HANDLE(kk_device_memory, mem, info->memory);
+ VK_FROM_HANDLE(kk_buffer, buffer, info->buffer);
+
+ buffer->mtl_handle = mtl_new_buffer_with_length(
+ mem->bo->mtl_handle, buffer->vk.size, info->memoryOffset);
+ buffer->vk.device_address = mtl_buffer_get_gpu_address(buffer->mtl_handle);
+ /* We need Metal to give us a CPU mapping so it correctly captures the
+ * data in the GPU debugger... */
+ mtl_get_contents(buffer->mtl_handle);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_BindBufferMemory2(VkDevice device, uint32_t bindInfoCount,
+ const VkBindBufferMemoryInfo *pBindInfos)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VkResult first_error_or_success = VK_SUCCESS;
+
+ for (uint32_t i = 0; i < bindInfoCount; ++i) {
+ VkResult result = kk_bind_buffer_memory(dev, &pBindInfos[i]);
+
+ const VkBindMemoryStatusKHR *status =
+ vk_find_struct_const(pBindInfos[i].pNext, BIND_MEMORY_STATUS_KHR);
+ if (status != NULL && status->pResult != NULL)
+ *status->pResult = result;
+
+ if (first_error_or_success == VK_SUCCESS)
+ first_error_or_success = result;
+ }
+
+ return first_error_or_success;
+}
+
+VKAPI_ATTR VkDeviceAddress VKAPI_CALL
+kk_GetBufferDeviceAddress(UNUSED VkDevice device,
+ const VkBufferDeviceAddressInfo *pInfo)
+{
+ VK_FROM_HANDLE(kk_buffer, buffer, pInfo->buffer);
+
+ return vk_buffer_address(&buffer->vk, 0);
+}
+
+VKAPI_ATTR uint64_t VKAPI_CALL
+kk_GetBufferOpaqueCaptureAddress(UNUSED VkDevice device,
+ const VkBufferDeviceAddressInfo *pInfo)
+{
+ VK_FROM_HANDLE(kk_buffer, buffer, pInfo->buffer);
+
+ return vk_buffer_address(&buffer->vk, 0);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_GetBufferOpaqueCaptureDescriptorDataEXT(
+ VkDevice device, const VkBufferCaptureDescriptorDataInfoEXT *pInfo,
+ void *pData)
+{
+ return VK_SUCCESS;
+}
diff --git a/src/kosmickrisp/vulkan/kk_buffer.h b/src/kosmickrisp/vulkan/kk_buffer.h
new file mode 100644
index 00000000000..1d9744678b6
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_buffer.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_BUFFER_H
+#define KK_BUFFER_H 1
+
+#include "kk_device_memory.h"
+#include "kk_private.h"
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "vk_buffer.h"
+
+struct kk_buffer {
+ struct vk_buffer vk;
+ mtl_buffer *mtl_handle;
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_buffer, vk.base, VkBuffer,
+ VK_OBJECT_TYPE_BUFFER)
+
+static inline struct kk_addr_range
+kk_buffer_addr_range(const struct kk_buffer *buffer, uint64_t offset,
+ uint64_t range)
+{
+ if (buffer == NULL)
+ return (struct kk_addr_range){.range = 0};
+
+ return (struct kk_addr_range){
+ .addr = vk_buffer_address(&buffer->vk, offset),
+ .range = vk_buffer_range(&buffer->vk, offset, range),
+ };
+}
+
+static inline mtl_resource *
+kk_buffer_to_mtl_resource(const struct kk_buffer *buffer)
+{
+ if (buffer != NULL) {
+ return (mtl_resource *)buffer->mtl_handle;
+ }
+ return NULL;
+}
+
+#endif // KK_BUFFER_H
diff --git a/src/kosmickrisp/vulkan/kk_buffer_view.c b/src/kosmickrisp/vulkan/kk_buffer_view.c
new file mode 100644
index 00000000000..0ee011f2f73
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_buffer_view.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_buffer_view.h"
+
+#include "kk_buffer.h"
+#include "kk_device.h"
+#include "kk_entrypoints.h"
+#include "kk_format.h"
+#include "kk_image_layout.h"
+#include "kk_nir_lower_vbo.h"
+#include "kk_physical_device.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+#include "kosmickrisp/bridge/mtl_format.h"
+
+#include "vk_format.h"
+
+VkFormatFeatureFlags2
+kk_get_buffer_format_features(struct kk_physical_device *pdev,
+ VkFormat vk_format)
+{
+ VkFormatFeatureFlags2 features = 0;
+ enum pipe_format p_format = vk_format_to_pipe_format(vk_format);
+
+ if (p_format == PIPE_FORMAT_NONE)
+ return 0;
+
+ const struct kk_va_format *format = kk_get_va_format(p_format);
+ if (format) {
+ if (format->texel_buffer.read)
+ features |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT;
+
+ if (format->texel_buffer.write)
+ features |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT;
+
+ /* Only these formats allow atomics for texel buffers */
+ if (vk_format == VK_FORMAT_R32_UINT || vk_format == VK_FORMAT_R32_SINT)
+ features |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
+ }
+
+ if (kk_vbo_supports_format(p_format))
+ features |= VK_FORMAT_FEATURE_2_VERTEX_BUFFER_BIT;
+
+ return features;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkBufferView *pBufferView)
+{
+ VK_FROM_HANDLE(kk_device, dev, _device);
+ struct kk_buffer_view *view =
+ vk_buffer_view_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*view));
+ if (!view)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ enum pipe_format p_format = vk_format_to_pipe_format(view->vk.format);
+ const struct kk_va_format *supported_format = kk_get_va_format(p_format);
+
+ /* If we reached here, we support reading at least */
+ enum mtl_texture_usage usage = MTL_TEXTURE_USAGE_SHADER_READ;
+ if (supported_format->texel_buffer.write)
+ usage |= MTL_TEXTURE_USAGE_SHADER_WRITE;
+
+ /* Only these formats allow atomics for texel buffers */
+ if (view->vk.format == VK_FORMAT_R32_UINT ||
+ view->vk.format == VK_FORMAT_R32_SINT)
+ usage |= MTL_TEXTURE_USAGE_SHADER_ATOMIC;
+
+ struct kk_image_layout layout = {
+ .width_px = view->vk.elements,
+ .height_px = 1u,
+ .depth_px = 1u,
+ .layers = 1u,
+ .type = MTL_TEXTURE_TYPE_TEXTURE_BUFFER,
+ .sample_count_sa = 1u,
+ .levels = 1u,
+ .optimized_layout = false,
+ .usage = usage,
+ .format = {.pipe = p_format, .mtl = supported_format->mtl_pixel_format},
+ .swizzle =
+ {
+ .red = supported_format->swizzle.red,
+ .green = supported_format->swizzle.green,
+ .blue = supported_format->swizzle.blue,
+ .alpha = supported_format->swizzle.alpha,
+ },
+ .linear_stride_B = view->vk.range,
+ };
+ struct kk_buffer *buffer =
+ container_of(view->vk.buffer, struct kk_buffer, vk);
+ view->mtl_texel_buffer_handle = mtl_new_texture_with_descriptor_linear(
+ buffer->mtl_handle, &layout, view->vk.offset);
+ if (!view->mtl_texel_buffer_handle) {
+ vk_buffer_view_destroy(&dev->vk, pAllocator, &view->vk);
+ return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+ view->texel_buffer_gpu_id =
+ mtl_texture_get_gpu_resource_id(view->mtl_texel_buffer_handle);
+
+ *pBufferView = kk_buffer_view_to_handle(view);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_device, dev, _device);
+ VK_FROM_HANDLE(kk_buffer_view, view, bufferView);
+
+ if (!view)
+ return;
+
+ mtl_release(view->mtl_texel_buffer_handle);
+ vk_buffer_view_destroy(&dev->vk, pAllocator, &view->vk);
+}
diff --git a/src/kosmickrisp/vulkan/kk_buffer_view.h b/src/kosmickrisp/vulkan/kk_buffer_view.h
new file mode 100644
index 00000000000..8525e50b760
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_buffer_view.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_BUFFER_VIEW_H
+#define KK_BUFFER_VIEW_H 1
+
+#include "kk_private.h"
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "vk_buffer_view.h"
+
+struct kk_physical_device;
+
+VkFormatFeatureFlags2
+kk_get_buffer_format_features(struct kk_physical_device *pdev, VkFormat format);
+
+struct kk_buffer_view {
+ struct vk_buffer_view vk;
+ mtl_texture *mtl_texel_buffer_handle;
+ uint64_t texel_buffer_gpu_id;
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_buffer_view, vk.base, VkBufferView,
+ VK_OBJECT_TYPE_BUFFER_VIEW)
+
+#endif
diff --git a/src/kosmickrisp/vulkan/kk_cmd_buffer.c b/src/kosmickrisp/vulkan/kk_cmd_buffer.c
new file mode 100644
index 00000000000..c4366012a8f
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_cmd_buffer.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_cmd_buffer.h"
+
+#include "kk_buffer.h"
+#include "kk_cmd_pool.h"
+#include "kk_descriptor_set_layout.h"
+#include "kk_encoder.h"
+#include "kk_entrypoints.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+#include "vk_alloc.h"
+#include "vk_pipeline_layout.h"
+
+static void
+kk_descriptor_state_fini(struct kk_cmd_buffer *cmd,
+ struct kk_descriptor_state *desc)
+{
+ struct kk_cmd_pool *pool = kk_cmd_buffer_pool(cmd);
+
+ for (unsigned i = 0; i < KK_MAX_SETS; i++) {
+ vk_free(&pool->vk.alloc, desc->push[i]);
+ desc->push[i] = NULL;
+ desc->sets[i] = NULL; /* We also need to set sets to NULL so state doesn't
+ propagate if we reset it */
+ desc->sets_not_resident = 0u;
+ }
+}
+
+void
+kk_cmd_release_resources(struct kk_device *dev, struct kk_cmd_buffer *cmd)
+{
+ kk_cmd_release_dynamic_ds_state(cmd);
+ kk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors);
+ kk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors);
+
+ /* Release all BOs used as descriptor buffers for submissions */
+ util_dynarray_foreach(&cmd->large_bos, struct kk_bo *, bo) {
+ kk_destroy_bo(dev, *bo);
+ }
+ util_dynarray_clear(&cmd->large_bos);
+}
+
+static void
+kk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
+{
+ struct kk_cmd_buffer *cmd =
+ container_of(vk_cmd_buffer, struct kk_cmd_buffer, vk);
+ struct kk_cmd_pool *pool = kk_cmd_buffer_pool(cmd);
+
+ vk_command_buffer_finish(&cmd->vk);
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+
+ kk_cmd_release_resources(dev, cmd);
+
+ vk_free(&pool->vk.alloc, cmd);
+}
+
+static VkResult
+kk_create_cmd_buffer(struct vk_command_pool *vk_pool,
+ VkCommandBufferLevel level,
+ struct vk_command_buffer **cmd_buffer_out)
+{
+ struct kk_cmd_pool *pool = container_of(vk_pool, struct kk_cmd_pool, vk);
+ struct kk_device *dev = kk_cmd_pool_device(pool);
+ struct kk_cmd_buffer *cmd;
+ VkResult result;
+
+ cmd = vk_zalloc(&pool->vk.alloc, sizeof(*cmd), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (cmd == NULL)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ result =
+ vk_command_buffer_init(&pool->vk, &cmd->vk, &kk_cmd_buffer_ops, level);
+ if (result != VK_SUCCESS) {
+ vk_free(&pool->vk.alloc, cmd);
+ return result;
+ }
+
+ util_dynarray_init(&cmd->large_bos, NULL);
+
+ cmd->vk.dynamic_graphics_state.vi = &cmd->state.gfx._dynamic_vi;
+ cmd->vk.dynamic_graphics_state.ms.sample_locations =
+ &cmd->state.gfx._dynamic_sl;
+
+ *cmd_buffer_out = &cmd->vk;
+
+ return VK_SUCCESS;
+}
+
+static void
+kk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer,
+ UNUSED VkCommandBufferResetFlags flags)
+{
+ struct kk_cmd_buffer *cmd =
+ container_of(vk_cmd_buffer, struct kk_cmd_buffer, vk);
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+
+ vk_command_buffer_reset(&cmd->vk);
+ kk_cmd_release_resources(dev, cmd);
+}
+
+const struct vk_command_buffer_ops kk_cmd_buffer_ops = {
+ .create = kk_create_cmd_buffer,
+ .reset = kk_reset_cmd_buffer,
+ .destroy = kk_destroy_cmd_buffer,
+};
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_BeginCommandBuffer(VkCommandBuffer commandBuffer,
+ const VkCommandBufferBeginInfo *pBeginInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+
+ kk_reset_cmd_buffer(&cmd->vk, 0u);
+ vk_command_buffer_begin(&cmd->vk, pBeginInfo);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_EndCommandBuffer(VkCommandBuffer commandBuffer)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+
+ return vk_command_buffer_end(&cmd->vk);
+}
+
+static bool
+kk_can_ignore_barrier(VkAccessFlags2 access, VkPipelineStageFlags2 stage)
+{
+ if (access == VK_ACCESS_2_NONE || stage == VK_PIPELINE_STAGE_2_NONE)
+ return true;
+
+ const VkAccessFlags2 ignore_access =
+ VK_ACCESS_2_HOST_READ_BIT | VK_ACCESS_2_HOST_WRITE_BIT;
+ const VkPipelineStageFlags2 ignore_stage = VK_PIPELINE_STAGE_2_HOST_BIT;
+ return (!(access ^ ignore_access)) || (!(stage ^ ignore_stage));
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
+ const VkDependencyInfo *pDependencyInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ enum kk_encoder_type last_used = cmd->encoder->main.last_used;
+ kk_encoder_signal_fence_and_end(cmd);
+
+ /* If we were inside a render pass, restart it loading attachments */
+ if (last_used == KK_ENC_RENDER) {
+ struct kk_graphics_state *state = &cmd->state.gfx;
+ assert(state->render_pass_descriptor);
+ kk_encoder_start_render(cmd, state->render_pass_descriptor,
+ state->render.view_mask);
+ kk_cmd_buffer_dirty_all_gfx(cmd);
+ }
+}
+
+static void
+kk_bind_descriptor_sets(struct kk_descriptor_state *desc,
+ const VkBindDescriptorSetsInfoKHR *info)
+{
+ VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
+
+ /* From the Vulkan 1.3.275 spec:
+ *
+ * "When binding a descriptor set (see Descriptor Set Binding) to
+ * set number N...
+ *
+ * If, additionally, the previously bound descriptor set for set
+ * N was bound using a pipeline layout not compatible for set N,
+ * then all bindings in sets numbered greater than N are
+ * disturbed."
+ *
+ * This means that, if some earlier set gets bound in such a way that
+ * it changes set_dynamic_buffer_start[s], this binding is implicitly
+ * invalidated. Therefore, we can always look at the current value
+ * of set_dynamic_buffer_start[s] as the base of our dynamic buffer
+ * range and it's only our responsibility to adjust all
+ * set_dynamic_buffer_start[p] for p > s as needed.
+ */
+ uint8_t dyn_buffer_start =
+ desc->root.set_dynamic_buffer_start[info->firstSet];
+
+ uint32_t next_dyn_offset = 0;
+ for (uint32_t i = 0; i < info->descriptorSetCount; ++i) {
+ unsigned s = i + info->firstSet;
+ VK_FROM_HANDLE(kk_descriptor_set, set, info->pDescriptorSets[i]);
+
+ if (desc->sets[s] != set) {
+ if (set != NULL) {
+ desc->root.sets[s] = set->addr;
+ desc->set_sizes[s] = set->size;
+ } else {
+ desc->root.sets[s] = 0;
+ desc->set_sizes[s] = 0;
+ }
+ desc->sets[s] = set;
+
+ desc->sets_not_resident |= BITFIELD_BIT(s);
+
+ /* Binding descriptors invalidates push descriptors */
+ desc->push_dirty &= ~BITFIELD_BIT(s);
+ }
+
+ if (pipeline_layout->set_layouts[s] != NULL) {
+ const struct kk_descriptor_set_layout *set_layout =
+ vk_to_kk_descriptor_set_layout(pipeline_layout->set_layouts[s]);
+
+ if (set != NULL && set_layout->dynamic_buffer_count > 0) {
+ for (uint32_t j = 0; j < set_layout->dynamic_buffer_count; j++) {
+ struct kk_buffer_address addr = set->dynamic_buffers[j];
+ addr.base_addr += info->pDynamicOffsets[next_dyn_offset + j];
+ desc->root.dynamic_buffers[dyn_buffer_start + j] = addr;
+ }
+ next_dyn_offset += set->layout->dynamic_buffer_count;
+ }
+
+ dyn_buffer_start += set_layout->dynamic_buffer_count;
+ } else {
+ assert(set == NULL);
+ }
+ }
+ assert(dyn_buffer_start <= KK_MAX_DYNAMIC_BUFFERS);
+ assert(next_dyn_offset <= info->dynamicOffsetCount);
+
+ for (uint32_t s = info->firstSet + info->descriptorSetCount; s < KK_MAX_SETS;
+ s++)
+ desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start;
+
+ desc->root_dirty = true;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdBindDescriptorSets2KHR(
+ VkCommandBuffer commandBuffer,
+ const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+
+ if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
+ kk_bind_descriptor_sets(&cmd->state.gfx.descriptors,
+ pBindDescriptorSetsInfo);
+ }
+
+ if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
+ kk_bind_descriptor_sets(&cmd->state.cs.descriptors,
+ pBindDescriptorSetsInfo);
+ }
+}
+
+static struct kk_push_descriptor_set *
+kk_cmd_push_descriptors(struct kk_cmd_buffer *cmd,
+ struct kk_descriptor_state *desc,
+ struct kk_descriptor_set_layout *set_layout,
+ uint32_t set)
+{
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+ assert(set < KK_MAX_SETS);
+ if (unlikely(desc->push[set] == NULL)) {
+ size_t size = sizeof(*desc->push[set]) +
+ (sizeof(mtl_resource *) * set_layout->descriptor_count);
+ desc->push[set] = vk_zalloc(&cmd->vk.pool->alloc, size, 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (unlikely(desc->push[set] == NULL)) {
+ vk_command_buffer_set_error(&cmd->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return NULL;
+ }
+ desc->push[set]->layout = set_layout;
+ for (uint32_t i = 0u; i < set_layout->descriptor_count; ++i)
+ desc->push[set]->mtl_resources[i] = dev->null_descriptor->map;
+ }
+
+ /* Pushing descriptors replaces whatever sets are bound */
+ desc->sets[set] = NULL;
+ desc->push_dirty |= BITFIELD_BIT(set);
+ desc->sets_not_resident |= BITFIELD_BIT(set);
+
+ return desc->push[set];
+}
+
+static void
+kk_push_descriptor_set(struct kk_cmd_buffer *cmd,
+ struct kk_descriptor_state *desc,
+ const VkPushDescriptorSetInfoKHR *info)
+{
+ VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout);
+
+ struct kk_descriptor_set_layout *set_layout =
+ vk_to_kk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]);
+
+ struct kk_push_descriptor_set *push_set =
+ kk_cmd_push_descriptors(cmd, desc, set_layout, info->set);
+ if (unlikely(push_set == NULL))
+ return;
+
+ kk_push_descriptor_set_update(push_set, info->descriptorWriteCount,
+ info->pDescriptorWrites);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdPushDescriptorSet2KHR(
+ VkCommandBuffer commandBuffer,
+ const VkPushDescriptorSetInfoKHR *pPushDescriptorSetInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+
+ if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
+ kk_push_descriptor_set(cmd, &cmd->state.gfx.descriptors,
+ pPushDescriptorSetInfo);
+ }
+
+ if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
+ kk_push_descriptor_set(cmd, &cmd->state.cs.descriptors,
+ pPushDescriptorSetInfo);
+ }
+}
+
+static void
+kk_push_constants(UNUSED struct kk_cmd_buffer *cmd,
+ struct kk_descriptor_state *desc,
+ const VkPushConstantsInfoKHR *info)
+{
+ memcpy(desc->root.push + info->offset, info->pValues, info->size);
+ desc->root_dirty = true;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer,
+ const VkPushConstantsInfoKHR *pPushConstantsInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+
+ if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS)
+ kk_push_constants(cmd, &cmd->state.gfx.descriptors, pPushConstantsInfo);
+
+ if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT)
+ kk_push_constants(cmd, &cmd->state.cs.descriptors, pPushConstantsInfo);
+}
+
+void
+kk_cmd_buffer_write_descriptor_buffer(struct kk_cmd_buffer *cmd,
+ struct kk_descriptor_state *desc,
+ size_t size, size_t offset)
+{
+ assert(size + offset <= sizeof(desc->root.sets));
+
+ struct kk_bo *root_buffer = desc->root.root_buffer;
+
+ memcpy(root_buffer->cpu, (uint8_t *)desc->root.sets + offset, size);
+}
+
+void
+kk_cmd_release_dynamic_ds_state(struct kk_cmd_buffer *cmd)
+{
+ if (cmd->state.gfx.is_depth_stencil_dynamic &&
+ cmd->state.gfx.depth_stencil_state)
+ mtl_release(cmd->state.gfx.depth_stencil_state);
+ cmd->state.gfx.depth_stencil_state = NULL;
+}
+
+struct kk_bo *
+kk_cmd_allocate_buffer(struct kk_cmd_buffer *cmd, size_t size_B,
+ size_t alignment_B)
+{
+ struct kk_bo *buffer = NULL;
+
+ VkResult result = kk_alloc_bo(kk_cmd_buffer_device(cmd), &cmd->vk.base,
+ size_B, alignment_B, &buffer);
+ if (result != VK_SUCCESS) {
+ vk_command_buffer_set_error(&cmd->vk, result);
+ return NULL;
+ }
+ util_dynarray_append(&cmd->large_bos, struct kk_bo *, buffer);
+
+ return buffer;
+}
+
+struct kk_pool
+kk_pool_upload(struct kk_cmd_buffer *cmd, void *data, size_t size_B,
+ size_t alignment_B)
+{
+ struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, size_B, alignment_B);
+ if (!bo)
+ return (struct kk_pool){};
+
+ memcpy(bo->cpu, data, size_B);
+ struct kk_pool pool = {.handle = bo->map, .gpu = bo->gpu, .cpu = bo->cpu};
+
+ return pool;
+}
+
+uint64_t
+kk_upload_descriptor_root(struct kk_cmd_buffer *cmd,
+ VkPipelineBindPoint bind_point)
+{
+ struct kk_descriptor_state *desc = kk_get_descriptors_state(cmd, bind_point);
+ struct kk_root_descriptor_table *root = &desc->root;
+ struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, sizeof(*root), 8u);
+ if (bo == NULL)
+ return 0u;
+
+ memcpy(bo->cpu, root, sizeof(*root));
+ root->root_buffer = bo;
+
+ return bo->gpu;
+}
+
+void
+kk_cmd_buffer_flush_push_descriptors(struct kk_cmd_buffer *cmd,
+ struct kk_descriptor_state *desc)
+{
+ u_foreach_bit(set_idx, desc->push_dirty) {
+ struct kk_push_descriptor_set *push_set = desc->push[set_idx];
+ struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, sizeof(push_set->data),
+ KK_MIN_UBO_ALIGNMENT);
+ if (bo == NULL)
+ return;
+
+ memcpy(bo->cpu, push_set->data, sizeof(push_set->data));
+ push_set->mtl_descriptor_buffer = bo->map;
+ desc->root.sets[set_idx] = bo->gpu;
+ desc->set_sizes[set_idx] = sizeof(push_set->data);
+ }
+
+ desc->root_dirty = true;
+ desc->push_dirty = 0;
+}
+
+static void
+kk_make_graphics_descriptor_resources_resident(struct kk_cmd_buffer *cmd)
+{
+ struct kk_descriptor_state *desc = &cmd->state.gfx.descriptors;
+ mtl_render_encoder *encoder = kk_render_encoder(cmd);
+ /* Make resources resident as required by Metal */
+ u_foreach_bit(set_index, desc->sets_not_resident) {
+ mtl_resource *descriptor_buffer = NULL;
+
+ /* If we have no set, it means it was a push set */
+ if (desc->sets[set_index]) {
+ struct kk_descriptor_set *set = desc->sets[set_index];
+ descriptor_buffer = set->mtl_descriptor_buffer;
+ } else {
+ struct kk_push_descriptor_set *push_set = desc->push[set_index];
+ descriptor_buffer = push_set->mtl_descriptor_buffer;
+ }
+
+ /* We could have empty descriptor sets for some reason... */
+ if (descriptor_buffer) {
+ mtl_render_use_resource(encoder, descriptor_buffer,
+ MTL_RESOURCE_USAGE_READ);
+ }
+ }
+
+ desc->sets_not_resident = 0u;
+}
+
+static void
+kk_make_compute_descriptor_resources_resident(struct kk_cmd_buffer *cmd)
+{
+ struct kk_descriptor_state *desc = &cmd->state.cs.descriptors;
+ mtl_compute_encoder *encoder = kk_compute_encoder(cmd);
+ u_foreach_bit(set_index, desc->sets_not_resident) {
+ /* Make resources resident as required by Metal */
+ mtl_resource *descriptor_buffer = NULL;
+ if (desc->sets[set_index]) {
+ struct kk_descriptor_set *set = desc->sets[set_index];
+ descriptor_buffer = set->mtl_descriptor_buffer;
+ } else {
+ struct kk_push_descriptor_set *push_set = desc->push[set_index];
+ descriptor_buffer = push_set->mtl_descriptor_buffer;
+ }
+
+ /* We could have empty descriptor sets for some reason... */
+ if (descriptor_buffer) {
+ mtl_compute_use_resource(encoder, descriptor_buffer,
+ MTL_RESOURCE_USAGE_READ);
+ }
+ }
+
+ desc->sets_not_resident = 0u;
+}
+
+void
+kk_make_descriptor_resources_resident(struct kk_cmd_buffer *cmd,
+ VkPipelineBindPoint bind_point)
+{
+ if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS)
+ kk_make_graphics_descriptor_resources_resident(cmd);
+ else if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)
+ kk_make_compute_descriptor_resources_resident(cmd);
+}
+
+void
+kk_cmd_write(struct kk_cmd_buffer *cmd, mtl_buffer *buffer, uint64_t addr,
+ uint64_t value)
+{
+ util_dynarray_append(&cmd->encoder->imm_writes, uint64_t, addr);
+ util_dynarray_append(&cmd->encoder->imm_writes, uint64_t, value);
+ util_dynarray_append(&cmd->encoder->resident_buffers, mtl_buffer *, buffer);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdPushDescriptorSetWithTemplate2KHR(
+ VkCommandBuffer commandBuffer, const VkPushDescriptorSetWithTemplateInfoKHR
+ *pPushDescriptorSetWithTemplateInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(vk_descriptor_update_template, template,
+ pPushDescriptorSetWithTemplateInfo->descriptorUpdateTemplate);
+ VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout,
+ pPushDescriptorSetWithTemplateInfo->layout);
+
+ struct kk_descriptor_state *desc =
+ kk_get_descriptors_state(cmd, template->bind_point);
+ struct kk_descriptor_set_layout *set_layout = vk_to_kk_descriptor_set_layout(
+ pipeline_layout->set_layouts[pPushDescriptorSetWithTemplateInfo->set]);
+ struct kk_push_descriptor_set *push_set = kk_cmd_push_descriptors(
+ cmd, desc, set_layout, pPushDescriptorSetWithTemplateInfo->set);
+ if (unlikely(push_set == NULL))
+ return;
+
+ kk_push_descriptor_set_update_template(
+ push_set, set_layout, template,
+ pPushDescriptorSetWithTemplateInfo->pData);
+}
diff --git a/src/kosmickrisp/vulkan/kk_cmd_buffer.h b/src/kosmickrisp/vulkan/kk_cmd_buffer.h
new file mode 100644
index 00000000000..83d91f4e0b2
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_cmd_buffer.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_CMD_BUFFER_H
+#define KK_CMD_BUFFER_H 1
+
+#include "kk_private.h"
+
+#include "kk_descriptor_set.h"
+#include "kk_image.h"
+#include "kk_nir_lower_vbo.h"
+#include "kk_shader.h"
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "util/u_dynarray.h"
+
+#include "vk_command_buffer.h"
+
+#include <stdio.h>
+
+struct kk_query_pool;
+
+struct kk_root_descriptor_table {
+ struct kk_bo *root_buffer;
+
+ union {
+ struct {
+ /* Vertex input state */
+ uint32_t buffer_strides[KK_MAX_VBUFS];
+ uint64_t attrib_base[KK_MAX_ATTRIBS];
+ uint32_t attrib_clamps[KK_MAX_ATTRIBS];
+ float blend_constant[4];
+ } draw;
+ struct {
+ uint32_t base_group[3];
+ } cs;
+ };
+
+ /* Client push constants */
+ uint8_t push[KK_MAX_PUSH_SIZE];
+
+ /* Descriptor set base addresses */
+ uint64_t sets[KK_MAX_SETS];
+
+ /* Dynamic buffer bindings */
+ struct kk_buffer_address dynamic_buffers[KK_MAX_DYNAMIC_BUFFERS];
+
+ /* Start index in dynamic_buffers where each set starts */
+ uint8_t set_dynamic_buffer_start[KK_MAX_SETS];
+};
+
+struct kk_descriptor_state {
+ bool root_dirty;
+ struct kk_root_descriptor_table root;
+
+ uint32_t set_sizes[KK_MAX_SETS];
+ struct kk_descriptor_set *sets[KK_MAX_SETS];
+ mtl_resource **resources[KK_MAX_SETS];
+ /* Non resident sets can either be sets or push. If sets[index] == NULL, then
+ * push[index] != NULL */
+ uint32_t sets_not_resident;
+
+ uint32_t push_dirty;
+ struct kk_push_descriptor_set *push[KK_MAX_SETS];
+};
+
+struct kk_attachment {
+ VkFormat vk_format;
+ struct kk_image_view *iview;
+
+ VkResolveModeFlagBits resolve_mode;
+ struct kk_image_view *resolve_iview;
+
+ /* Needed to track the value of storeOp in case we need to copy images for
+ * the DRM_FORMAT_MOD_LINEAR case */
+ VkAttachmentStoreOp store_op;
+};
+
+struct kk_rendering_state {
+ VkRenderingFlagBits flags;
+
+ VkRect2D area;
+ uint32_t layer_count;
+ uint32_t view_mask;
+ uint32_t samples;
+
+ uint32_t color_att_count;
+ struct kk_attachment color_att[KK_MAX_RTS];
+ struct kk_attachment depth_att;
+ struct kk_attachment stencil_att;
+ struct kk_attachment fsr_att;
+};
+
+/* Dirty tracking bits for state not tracked by vk_dynamic_graphics_state or
+ * shaders_dirty.
+ */
+enum kk_dirty {
+ KK_DIRTY_INDEX = BITFIELD_BIT(0),
+ KK_DIRTY_VB = BITFIELD_BIT(1),
+ KK_DIRTY_OCCLUSION = BITFIELD_BIT(2),
+ KK_DIRTY_PROVOKING = BITFIELD_BIT(3),
+ KK_DIRTY_VARYINGS = BITFIELD_BIT(4),
+ KK_DIRTY_PIPELINE = BITFIELD_BIT(5),
+};
+
+struct kk_graphics_state {
+ struct kk_rendering_state render;
+ struct kk_descriptor_state descriptors;
+
+ mtl_render_pipeline_state *pipeline_state;
+ mtl_depth_stencil_state *depth_stencil_state;
+ mtl_render_pass_descriptor *render_pass_descriptor;
+ bool is_depth_stencil_dynamic;
+ bool is_cull_front_and_back;
+ bool restart_disabled;
+
+ enum mtl_primitive_type primitive_type;
+ enum mesa_prim prim;
+ enum kk_dirty dirty;
+
+ struct {
+ enum mtl_visibility_result_mode mode;
+
+ /* If enabled, index of the current occlusion query in the occlusion heap.
+ * There can only be one active at a time (hardware constraint).
+ */
+ uint16_t index;
+ } occlusion;
+
+ /* Index buffer */
+ struct {
+ mtl_buffer *handle;
+ uint32_t size;
+ uint32_t offset;
+ uint32_t restart;
+ uint8_t bytes_per_index;
+ } index;
+
+ /* Vertex buffers */
+ struct {
+ struct kk_addr_range addr_range[KK_MAX_VBUFS];
+ mtl_buffer *handles[KK_MAX_VBUFS];
+ uint32_t attribs_read;
+ /* Required to understand maximum size of index buffer if primitive is
+ * triangle fans */
+ uint32_t max_vertices;
+ } vb;
+
+ /* Needed by vk_command_buffer::dynamic_graphics_state */
+ struct vk_vertex_input_state _dynamic_vi;
+ struct vk_sample_locations_state _dynamic_sl;
+};
+
+struct kk_compute_state {
+ struct kk_descriptor_state descriptors;
+ mtl_compute_pipeline_state *pipeline_state;
+ struct mtl_size local_size;
+ enum kk_dirty dirty;
+};
+
+struct kk_encoder;
+
+struct kk_cmd_buffer {
+ struct vk_command_buffer vk;
+
+ struct kk_encoder *encoder;
+ void *drawable;
+
+ struct {
+ struct kk_graphics_state gfx;
+ struct kk_compute_state cs;
+ } state;
+
+ /* Owned large BOs */
+ struct util_dynarray large_bos;
+};
+
+VK_DEFINE_HANDLE_CASTS(kk_cmd_buffer, vk.base, VkCommandBuffer,
+ VK_OBJECT_TYPE_COMMAND_BUFFER)
+
+extern const struct vk_command_buffer_ops kk_cmd_buffer_ops;
+
+static inline struct kk_device *
+kk_cmd_buffer_device(struct kk_cmd_buffer *cmd)
+{
+ return (struct kk_device *)cmd->vk.base.device;
+}
+
+static inline struct kk_cmd_pool *
+kk_cmd_buffer_pool(struct kk_cmd_buffer *cmd)
+{
+ return (struct kk_cmd_pool *)cmd->vk.pool;
+}
+
+static inline struct kk_descriptor_state *
+kk_get_descriptors_state(struct kk_cmd_buffer *cmd,
+ VkPipelineBindPoint bind_point)
+{
+ switch (bind_point) {
+ case VK_PIPELINE_BIND_POINT_GRAPHICS:
+ return &cmd->state.gfx.descriptors;
+ case VK_PIPELINE_BIND_POINT_COMPUTE:
+ return &cmd->state.cs.descriptors;
+ default:
+ UNREACHABLE("Unhandled bind point");
+ }
+};
+
+void kk_cmd_release_resources(struct kk_device *dev, struct kk_cmd_buffer *cmd);
+
+static void
+kk_cmd_buffer_dirty_all_gfx(struct kk_cmd_buffer *cmd)
+{
+ /* Ensure we flush all graphics state */
+ vk_dynamic_graphics_state_dirty_all(&cmd->vk.dynamic_graphics_state);
+ cmd->state.gfx.dirty = ~0u;
+}
+
+void kk_cmd_release_dynamic_ds_state(struct kk_cmd_buffer *cmd);
+
+mtl_depth_stencil_state *
+kk_compile_depth_stencil_state(struct kk_device *device,
+ const struct vk_depth_stencil_state *ds,
+ bool has_depth, bool has_stencil);
+
+void kk_meta_resolve_rendering(struct kk_cmd_buffer *cmd,
+ const VkRenderingInfo *pRenderingInfo);
+
+void kk_cmd_buffer_write_descriptor_buffer(struct kk_cmd_buffer *cmd,
+ struct kk_descriptor_state *desc,
+ size_t size, size_t offset);
+
+/* Allocates temporary buffer that will be released once the command buffer has
+ * completed */
+struct kk_bo *kk_cmd_allocate_buffer(struct kk_cmd_buffer *cmd, size_t size_B,
+ size_t alignment_B);
+
+struct kk_pool {
+ mtl_buffer *handle;
+ uint64_t gpu;
+ void *cpu;
+};
+struct kk_pool kk_pool_upload(struct kk_cmd_buffer *cmd, void *data,
+ size_t size_B, size_t alignment_B);
+
+uint64_t kk_upload_descriptor_root(struct kk_cmd_buffer *cmd,
+ VkPipelineBindPoint bind_point);
+
+void kk_cmd_buffer_flush_push_descriptors(struct kk_cmd_buffer *cmd,
+ struct kk_descriptor_state *desc);
+
+void kk_make_descriptor_resources_resident(struct kk_cmd_buffer *cmd,
+ VkPipelineBindPoint bind_point);
+
+void kk_cmd_write(struct kk_cmd_buffer *cmd, mtl_buffer *buffer, uint64_t addr,
+ uint64_t value);
+
+void kk_cmd_dispatch_pipeline(struct kk_cmd_buffer *cmd,
+ mtl_compute_encoder *encoder,
+ mtl_compute_pipeline_state *pipeline,
+ const void *push_data, size_t push_size,
+ uint32_t groupCountX, uint32_t groupCountY,
+ uint32_t groupCountZ);
+
+#endif
diff --git a/src/kosmickrisp/vulkan/kk_cmd_clear.c b/src/kosmickrisp/vulkan/kk_cmd_clear.c
new file mode 100644
index 00000000000..2f5e418d1ae
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_cmd_clear.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2024 Valve Corporation
+ * Copyright 2024 Alyssa Rosenzweig
+ * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+#include "kk_cmd_buffer.h"
+
+#include "kk_device.h"
+#include "kk_entrypoints.h"
+#include "kk_image.h"
+#include "kk_image_view.h"
+#include "kk_physical_device.h"
+
+#include "vk_format.h"
+#include "vk_meta.h"
+
+static VkImageViewType
+render_view_type(VkImageType image_type, unsigned layer_count)
+{
+ switch (image_type) {
+ case VK_IMAGE_TYPE_1D:
+ return layer_count == 1 ? VK_IMAGE_VIEW_TYPE_1D
+ : VK_IMAGE_VIEW_TYPE_1D_ARRAY;
+ case VK_IMAGE_TYPE_2D:
+ return layer_count == 1 ? VK_IMAGE_VIEW_TYPE_2D
+ : VK_IMAGE_VIEW_TYPE_2D_ARRAY;
+ case VK_IMAGE_TYPE_3D:
+ return VK_IMAGE_VIEW_TYPE_3D;
+ default:
+ UNREACHABLE("Invalid image type");
+ }
+}
+
+static void
+clear_image(struct kk_cmd_buffer *cmd, struct kk_image *image,
+ VkImageLayout image_layout, VkFormat format,
+ const VkClearValue *clear_value, uint32_t range_count,
+ const VkImageSubresourceRange *ranges)
+{
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+ ASSERTED VkResult result;
+
+ for (uint32_t r = 0; r < range_count; r++) {
+ const uint32_t level_count =
+ vk_image_subresource_level_count(&image->vk, &ranges[r]);
+
+ for (uint32_t l = 0; l < level_count; l++) {
+ const uint32_t level = ranges[r].baseMipLevel + l;
+
+ const VkExtent3D level_extent =
+ vk_image_mip_level_extent(&image->vk, level);
+
+ uint32_t base_array_layer, layer_count;
+ if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
+ base_array_layer = 0;
+ layer_count = level_extent.depth;
+ } else {
+ base_array_layer = ranges[r].baseArrayLayer;
+ layer_count =
+ vk_image_subresource_layer_count(&image->vk, &ranges[r]);
+ }
+
+ const VkImageViewUsageCreateInfo view_usage_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
+ .usage = (ranges[r].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
+ ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
+ : VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ };
+ const VkImageViewCreateInfo view_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA,
+ .pNext = &view_usage_info,
+ .image = kk_image_to_handle(image),
+ .viewType = render_view_type(image->vk.image_type, layer_count),
+ .format = format,
+ .subresourceRange =
+ {
+ .aspectMask = image->vk.aspects,
+ .baseMipLevel = level,
+ .levelCount = 1,
+ .baseArrayLayer = base_array_layer,
+ .layerCount = layer_count,
+ },
+ };
+
+ /* We use vk_meta_create_image_view here for lifetime managemnt */
+ VkImageView view;
+ result =
+ vk_meta_create_image_view(&cmd->vk, &dev->meta, &view_info, &view);
+ assert(result == VK_SUCCESS);
+
+ VkRenderingInfo render = {
+ .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
+ .renderArea =
+ {
+ .offset = {0, 0},
+ .extent = {level_extent.width, level_extent.height},
+ },
+ .layerCount = layer_count,
+ };
+
+ VkRenderingAttachmentInfo vk_att = {
+ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
+ .imageView = view,
+ .imageLayout = image_layout,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .clearValue = *clear_value,
+ };
+
+ if (ranges[r].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ render.colorAttachmentCount = 1;
+ render.pColorAttachments = &vk_att;
+ }
+ if (ranges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
+ render.pDepthAttachment = &vk_att;
+ if (ranges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
+ render.pStencilAttachment = &vk_att;
+
+ kk_CmdBeginRendering(kk_cmd_buffer_to_handle(cmd), &render);
+ kk_CmdEndRendering(kk_cmd_buffer_to_handle(cmd));
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage _image,
+ VkImageLayout imageLayout,
+ const VkClearColorValue *pColor, uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_image, image, _image);
+
+ VkClearValue clear_value = {
+ .color = *pColor,
+ };
+
+ VkFormat vk_format = image->vk.format;
+ if (vk_format == VK_FORMAT_R64_UINT || vk_format == VK_FORMAT_R64_SINT)
+ vk_format = VK_FORMAT_R32G32_UINT;
+
+ enum pipe_format p_format = vk_format_to_pipe_format(vk_format);
+ assert(p_format != PIPE_FORMAT_NONE);
+
+ clear_image(cmd, image, imageLayout, vk_format, &clear_value, rangeCount,
+ pRanges);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage _image,
+ VkImageLayout imageLayout,
+ const VkClearDepthStencilValue *pDepthStencil,
+ uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_image, image, _image);
+
+ const VkClearValue clear_value = {
+ .depthStencil = *pDepthStencil,
+ };
+
+ clear_image(cmd, image, imageLayout, image->vk.format, &clear_value,
+ rangeCount, pRanges);
+}
diff --git a/src/kosmickrisp/vulkan/kk_cmd_copy.c b/src/kosmickrisp/vulkan/kk_cmd_copy.c
new file mode 100644
index 00000000000..32b1b5af359
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_cmd_copy.c
@@ -0,0 +1,355 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_cmd_buffer.h"
+
+#include "kk_bo.h"
+#include "kk_buffer.h"
+#include "kk_device.h"
+#include "kk_encoder.h"
+#include "kk_entrypoints.h"
+#include "kk_physical_device.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+#include "kosmickrisp/bridge/vk_to_mtl_map.h"
+
+#include "util/format/u_format.h"
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
+ const VkCopyBufferInfo2 *pCopyBufferInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_buffer, src, pCopyBufferInfo->srcBuffer);
+ VK_FROM_HANDLE(kk_buffer, dst, pCopyBufferInfo->dstBuffer);
+
+ mtl_blit_encoder *blit = kk_blit_encoder(cmd);
+ for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
+ const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[i];
+ mtl_copy_from_buffer_to_buffer(blit, src->mtl_handle, region->srcOffset,
+ dst->mtl_handle, region->dstOffset,
+ region->size);
+ }
+}
+
+struct kk_buffer_image_copy_info {
+ struct mtl_buffer_image_copy mtl_data;
+ size_t buffer_slice_size_B;
+};
+
+static struct kk_buffer_image_copy_info
+vk_buffer_image_copy_to_mtl_buffer_image_copy(
+ const VkBufferImageCopy2 *region, const struct kk_image_plane *plane)
+{
+ struct kk_buffer_image_copy_info copy;
+ enum pipe_format p_format = plane->layout.format.pipe;
+ if (region->imageSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+ copy.mtl_data.options = MTL_BLIT_OPTION_DEPTH_FROM_DEPTH_STENCIL;
+ p_format = util_format_get_depth_only(p_format);
+ } else if (region->imageSubresource.aspectMask ==
+ VK_IMAGE_ASPECT_STENCIL_BIT) {
+ copy.mtl_data.options = MTL_BLIT_OPTION_STENCIL_FROM_DEPTH_STENCIL;
+ p_format = PIPE_FORMAT_S8_UINT;
+ } else
+ copy.mtl_data.options = MTL_BLIT_OPTION_NONE;
+
+ const uint32_t buffer_width = region->bufferRowLength
+ ? region->bufferRowLength
+ : region->imageExtent.width;
+ const uint32_t buffer_height = region->bufferImageHeight
+ ? region->bufferImageHeight
+ : region->imageExtent.height;
+
+ const uint32_t buffer_stride_B =
+ util_format_get_stride(p_format, buffer_width);
+ const uint32_t buffer_size_2d_B =
+ util_format_get_2d_size(p_format, buffer_stride_B, buffer_height);
+
+ /* Metal requires this value to be 0 for 2D images, otherwise the number of
+ * bytes between each 2D image of a 3D texture */
+ copy.mtl_data.buffer_2d_image_size_B =
+ plane->layout.depth_px == 1u ? 0u : buffer_size_2d_B;
+ copy.mtl_data.buffer_stride_B = buffer_stride_B;
+ copy.mtl_data.image_size = vk_extent_3d_to_mtl_size(&region->imageExtent);
+ copy.mtl_data.image_origin =
+ vk_offset_3d_to_mtl_origin(&region->imageOffset);
+ copy.mtl_data.image_level = region->imageSubresource.mipLevel;
+ copy.buffer_slice_size_B = buffer_size_2d_B;
+
+ return copy;
+}
+
+#define kk_foreach_slice(ndx, image, subresource_member) \
+ for (uint32_t ndx = region->subresource_member.baseArrayLayer; \
+ ndx < (region->subresource_member.baseArrayLayer + \
+ vk_image_subresource_layer_count(&image->vk, \
+ &region->subresource_member)); \
+ ++ndx)
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,
+ const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_buffer, buffer, pCopyBufferToImageInfo->srcBuffer);
+ VK_FROM_HANDLE(kk_image, image, pCopyBufferToImageInfo->dstImage);
+
+ mtl_blit_encoder *blit = kk_blit_encoder(cmd);
+ for (int r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
+ const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r];
+ const uint8_t plane_index = kk_image_memory_aspects_to_plane(
+ image, region->imageSubresource.aspectMask);
+ struct kk_image_plane *plane = &image->planes[plane_index];
+ struct kk_buffer_image_copy_info info =
+ vk_buffer_image_copy_to_mtl_buffer_image_copy(region, plane);
+ info.mtl_data.buffer = buffer->mtl_handle;
+ info.mtl_data.image = plane->mtl_handle;
+ size_t buffer_offset = region->bufferOffset;
+
+ kk_foreach_slice(slice, image, imageSubresource)
+ {
+ info.mtl_data.image_slice = slice;
+ info.mtl_data.buffer_offset_B = buffer_offset;
+ mtl_copy_from_buffer_to_texture(blit, &info.mtl_data);
+ buffer_offset += info.buffer_slice_size_B;
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,
+ const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_image, image, pCopyImageToBufferInfo->srcImage);
+ VK_FROM_HANDLE(kk_buffer, buffer, pCopyImageToBufferInfo->dstBuffer);
+
+ mtl_blit_encoder *blit = kk_blit_encoder(cmd);
+ for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
+ const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[r];
+ const uint8_t plane_index = kk_image_memory_aspects_to_plane(
+ image, region->imageSubresource.aspectMask);
+ struct kk_image_plane *plane = &image->planes[plane_index];
+ struct kk_buffer_image_copy_info info =
+ vk_buffer_image_copy_to_mtl_buffer_image_copy(region, plane);
+ info.mtl_data.buffer = buffer->mtl_handle;
+ info.mtl_data.image = plane->mtl_handle;
+ size_t buffer_offset = region->bufferOffset;
+
+ kk_foreach_slice(slice, image, imageSubresource)
+ {
+ info.mtl_data.image_slice = slice;
+ info.mtl_data.buffer_offset_B = buffer_offset;
+ mtl_copy_from_texture_to_buffer(blit, &info.mtl_data);
+ buffer_offset += info.buffer_slice_size_B;
+ }
+ }
+}
+
+struct copy_image_data {
+ struct kk_cmd_buffer *cmd;
+ struct kk_image *src;
+ struct kk_image *dst;
+ const VkImageCopy2 *regions;
+ uint32_t plane_index;
+ uint32_t region_count;
+};
+
+/* Copies images by doing a texture->buffer->texture transfer. This is required
+ * for compressed formats */
+static void
+copy_through_buffer(struct copy_image_data *data)
+{
+ struct kk_image *src = data->src;
+ struct kk_image *dst = data->dst;
+ struct kk_image_plane *src_plane = &src->planes[data->plane_index];
+ struct kk_image_plane *dst_plane = &dst->planes[data->plane_index];
+ enum pipe_format src_format = src_plane->layout.format.pipe;
+ enum pipe_format dst_format = dst_plane->layout.format.pipe;
+ bool is_src_compressed = util_format_is_compressed(src_format);
+ bool is_dst_compressed = util_format_is_compressed(dst_format);
+ /* We shouldn't do any depth/stencil through this path */
+ assert(!util_format_is_depth_or_stencil(src_format) ||
+ !util_format_is_depth_or_stencil(dst_format));
+ mtl_blit_encoder *blit = kk_blit_encoder(data->cmd);
+
+ size_t buffer_size = 0u;
+ for (unsigned r = 0; r < data->region_count; r++) {
+ const VkImageCopy2 *region = &data->regions[r];
+ const uint32_t buffer_stride_B =
+ util_format_get_stride(src_format, region->extent.width);
+ const uint32_t buffer_size_2d_B = util_format_get_2d_size(
+ src_format, buffer_stride_B, region->extent.height);
+ const uint32_t layer_count =
+ vk_image_subresource_layer_count(&src->vk, &region->srcSubresource);
+ buffer_size += buffer_size_2d_B * layer_count;
+ }
+ struct kk_bo *bo = kk_cmd_allocate_buffer(data->cmd, buffer_size, 8);
+
+ size_t buffer_offset = 0u;
+ for (unsigned r = 0; r < data->region_count; r++) {
+ const VkImageCopy2 *region = &data->regions[r];
+ uint32_t mip_level = region->srcSubresource.mipLevel;
+ const uint32_t mip_width =
+ u_minify(src_plane->layout.width_px, mip_level);
+ const uint32_t mip_height =
+ u_minify(src_plane->layout.height_px, mip_level);
+ const uint32_t stride_B = util_format_get_stride(src_format, mip_width);
+ const uint32_t size_2d_B =
+ util_format_get_2d_size(src_format, stride_B, mip_height);
+ const uint32_t buffer_stride_B =
+ util_format_get_stride(src_format, region->extent.width);
+ const uint32_t buffer_size_2d_B = util_format_get_2d_size(
+ src_format, buffer_stride_B, region->extent.height);
+
+ struct kk_buffer_image_copy_info info;
+
+ /* Metal requires this value to be 0 for 2D images, otherwise the number
+ * of bytes between each 2D image of a 3D texture */
+ info.mtl_data.buffer_2d_image_size_B =
+ src_plane->layout.depth_px == 1u ? 0u : size_2d_B;
+ info.mtl_data.buffer_stride_B = buffer_stride_B;
+ info.mtl_data.image_level = mip_level;
+ info.mtl_data.buffer = bo->map;
+ info.mtl_data.options = MTL_BLIT_OPTION_NONE;
+ info.buffer_slice_size_B = buffer_size_2d_B;
+ struct mtl_size src_size = vk_extent_3d_to_mtl_size(&region->extent);
+ struct mtl_size dst_size = vk_extent_3d_to_mtl_size(&region->extent);
+ /* Need to adjust size to block dimensions */
+ if (is_src_compressed) {
+ dst_size.x /= util_format_get_blockwidth(src_format);
+ dst_size.y /= util_format_get_blockheight(src_format);
+ dst_size.z /= util_format_get_blockdepth(src_format);
+ }
+ if (is_dst_compressed) {
+ dst_size.x *= util_format_get_blockwidth(dst_format);
+ dst_size.y *= util_format_get_blockheight(dst_format);
+ dst_size.z *= util_format_get_blockdepth(dst_format);
+ }
+ struct mtl_origin src_origin =
+ vk_offset_3d_to_mtl_origin(&region->srcOffset);
+ struct mtl_origin dst_origin =
+ vk_offset_3d_to_mtl_origin(&region->dstOffset);
+
+ /* Texture->Buffer->Texture */
+ // TODO_KOSMICKRISP We don't handle 3D to 2D array nor vice-versa in this
+ // path. Unsure if it's even needed, can compressed textures be 3D?
+ kk_foreach_slice(slice, src, srcSubresource)
+ {
+ info.mtl_data.image = src_plane->mtl_handle;
+ info.mtl_data.image_size = src_size;
+ info.mtl_data.image_origin = src_origin;
+ info.mtl_data.image_slice = slice;
+ info.mtl_data.buffer_offset_B = buffer_offset;
+ mtl_copy_from_texture_to_buffer(blit, &info.mtl_data);
+
+ info.mtl_data.image = dst_plane->mtl_handle;
+ info.mtl_data.image_size = dst_size;
+ info.mtl_data.image_origin = dst_origin;
+ mtl_copy_from_buffer_to_texture(blit, &info.mtl_data);
+
+ buffer_offset += info.buffer_slice_size_B;
+ }
+ }
+}
+
+/* Copies images through Metal's texture->texture copy mechanism */
+static void
+copy_image(struct copy_image_data *data)
+{
+ mtl_blit_encoder *blit = kk_blit_encoder(data->cmd);
+ for (unsigned r = 0; r < data->region_count; r++) {
+ const VkImageCopy2 *region = &data->regions[r];
+ uint8_t src_plane_index = kk_image_aspects_to_plane(
+ data->src, region->srcSubresource.aspectMask);
+ if (data->plane_index != src_plane_index)
+ continue;
+
+ uint8_t dst_plane_index = kk_image_aspects_to_plane(
+ data->dst, region->dstSubresource.aspectMask);
+ struct kk_image *src = data->src;
+ struct kk_image *dst = data->dst;
+ struct kk_image_plane *src_plane = &src->planes[src_plane_index];
+ struct kk_image_plane *dst_plane = &dst->planes[dst_plane_index];
+
+ /* From the Vulkan 1.3.217 spec:
+ *
+ * "When copying between compressed and uncompressed formats the
+ * extent members represent the texel dimensions of the source image
+ * and not the destination."
+ */
+ const VkExtent3D extent_px =
+ vk_image_sanitize_extent(&src->vk, region->extent);
+
+ size_t src_slice = region->srcSubresource.baseArrayLayer;
+ size_t src_level = region->srcSubresource.mipLevel;
+ struct mtl_origin src_origin =
+ vk_offset_3d_to_mtl_origin(&region->srcOffset);
+ struct mtl_size size = {.x = extent_px.width,
+ .y = extent_px.height,
+ .z = extent_px.depth};
+ size_t dst_slice = region->dstSubresource.baseArrayLayer;
+ size_t dst_level = region->dstSubresource.mipLevel;
+ struct mtl_origin dst_origin =
+ vk_offset_3d_to_mtl_origin(&region->dstOffset);
+
+ /* When copying 3D to 2D layered or vice-versa, we need to change the 3D
+ * size to 2D and iterate on the layer count of the 2D image (which is the
+ * same as the depth of the 3D) and adjust origin and slice accordingly */
+ uint32_t layer_count =
+ vk_image_subresource_layer_count(&src->vk, &region->srcSubresource);
+ const uint32_t dst_layer_count =
+ vk_image_subresource_layer_count(&dst->vk, &region->dstSubresource);
+ size_t *src_increase = &src_slice;
+ size_t *dst_increase = &dst_slice;
+
+ if (layer_count < dst_layer_count) { /* 3D to 2D layered */
+ layer_count = dst_layer_count;
+ src_increase = &src_origin.z;
+ size.z = 1u;
+ } else if (dst_layer_count < layer_count) { /* 2D layered to 3D */
+ dst_increase = &dst_origin.z;
+ size.z = 1u;
+ }
+ for (uint32_t l = 0; l < layer_count;
+ ++l, ++(*src_increase), ++(*dst_increase)) {
+ mtl_copy_from_texture_to_texture(
+ blit, src_plane->mtl_handle, src_slice, src_level, src_origin, size,
+ dst_plane->mtl_handle, dst_slice, dst_level, dst_origin);
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdCopyImage2(VkCommandBuffer commandBuffer,
+ const VkCopyImageInfo2 *pCopyImageInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_image, src, pCopyImageInfo->srcImage);
+ VK_FROM_HANDLE(kk_image, dst, pCopyImageInfo->dstImage);
+
+ for (uint32_t i = 0u; i < src->plane_count; ++i) {
+ struct kk_image_plane *src_plane = &src->planes[i];
+ struct kk_image_plane *dst_plane = &dst->planes[i];
+ enum pipe_format src_format = src_plane->layout.format.pipe;
+ enum pipe_format dst_format = dst_plane->layout.format.pipe;
+ struct copy_image_data data = {
+ .cmd = cmd,
+ .src = src,
+ .dst = dst,
+ .regions = pCopyImageInfo->pRegions,
+ .plane_index = i,
+ .region_count = pCopyImageInfo->regionCount,
+ };
+ bool is_src_compressed = util_format_is_compressed(src_format);
+ bool is_dst_compressed = util_format_is_compressed(dst_format);
+ if (src_format != dst_format && (is_src_compressed || is_dst_compressed))
+ copy_through_buffer(&data);
+ else
+ copy_image(&data);
+ }
+}
diff --git a/src/kosmickrisp/vulkan/kk_cmd_dispatch.c b/src/kosmickrisp/vulkan/kk_cmd_dispatch.c
new file mode 100644
index 00000000000..338766ef12d
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_cmd_dispatch.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2025 LunarG, Inc
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "vulkan/vulkan_core.h"
+
+#include "kk_buffer.h"
+#include "kk_cmd_buffer.h"
+#include "kk_descriptor_set_layout.h"
+#include "kk_device.h"
+#include "kk_encoder.h"
+#include "kk_entrypoints.h"
+#include "kk_shader.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+#include "vk_common_entrypoints.h"
+
+void
+kk_cmd_dispatch_pipeline(struct kk_cmd_buffer *cmd,
+ mtl_compute_encoder *encoder,
+ mtl_compute_pipeline_state *pipeline,
+ const void *push_data, size_t push_size,
+ uint32_t groupCountX, uint32_t groupCountY,
+ uint32_t groupCountZ)
+{
+ struct kk_root_descriptor_table *root = NULL;
+ struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, sizeof(*root), 8u);
+ /* kk_cmd_allocate_buffer already sets the error, we can just exit */
+ if (!bo)
+ return;
+
+ root = bo->cpu;
+ assert(push_size <= sizeof(root->push));
+ memcpy(root->push, push_data, push_size);
+ root->cs.base_group[0] = 1; /* TODO_KOSMICKRISP This is hard-coded because we
+ know this is the size we create them with */
+ root->cs.base_group[1] = 1;
+ root->cs.base_group[2] = 1;
+
+ mtl_compute_set_buffer(encoder, bo->map, 0, 0);
+ mtl_compute_set_pipeline_state(encoder, pipeline);
+
+ struct mtl_size grid_size = {
+ .x = groupCountX,
+ .y = groupCountY,
+ .z = groupCountZ,
+ };
+ struct mtl_size local_size = {
+ .x = 1,
+ .y = 1,
+ .z = 1,
+ };
+ mtl_dispatch_threads(encoder, grid_size, local_size);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX,
+ uint32_t groupCountY, uint32_t groupCountZ)
+{
+ kk_CmdDispatchBase(commandBuffer, 0, 0, 0, groupCountX, groupCountY,
+ groupCountZ);
+}
+
+static void
+kk_flush_compute_state(struct kk_cmd_buffer *cmd)
+{
+ mtl_compute_encoder *enc = kk_compute_encoder(cmd);
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+
+ // Fill Metal argument buffer with descriptor set addresses
+ struct kk_descriptor_state *desc = &cmd->state.cs.descriptors;
+
+ if (desc->push_dirty)
+ kk_cmd_buffer_flush_push_descriptors(cmd, desc);
+ /* After push descriptors' buffers are created. Otherwise, the buffer where
+ * they live will not be created and cannot make it resident */
+ if (desc->sets_not_resident)
+ kk_make_descriptor_resources_resident(cmd,
+ VK_PIPELINE_BIND_POINT_COMPUTE);
+ if (desc->root_dirty)
+ kk_upload_descriptor_root(cmd, VK_PIPELINE_BIND_POINT_COMPUTE);
+
+ /* Make user allocated heaps resident */
+ simple_mtx_lock(&dev->user_heap_cache.mutex);
+ if (cmd->encoder->main.user_heap_hash != dev->user_heap_cache.hash) {
+ cmd->encoder->main.user_heap_hash = dev->user_heap_cache.hash;
+ mtl_heap **heaps = util_dynarray_begin(&dev->user_heap_cache.handles);
+ uint32_t count =
+ util_dynarray_num_elements(&dev->user_heap_cache.handles, mtl_heap *);
+ mtl_compute_use_heaps(enc, heaps, count);
+ }
+ simple_mtx_unlock(&dev->user_heap_cache.mutex);
+
+ struct kk_bo *root_buffer = desc->root.root_buffer;
+ if (root_buffer)
+ mtl_compute_set_buffer(enc, root_buffer->map, 0, 0);
+
+ mtl_compute_set_pipeline_state(enc, cmd->state.cs.pipeline_state);
+ cmd->state.cs.dirty = 0u;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t baseGroupX,
+ uint32_t baseGroupY, uint32_t baseGroupZ,
+ uint32_t groupCountX, uint32_t groupCountY,
+ uint32_t groupCountZ)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+
+ struct kk_descriptor_state *desc = &cmd->state.cs.descriptors;
+ desc->root_dirty |= desc->root.cs.base_group[0] != baseGroupX;
+ desc->root_dirty |= desc->root.cs.base_group[1] != baseGroupY;
+ desc->root_dirty |= desc->root.cs.base_group[2] != baseGroupZ;
+ desc->root.cs.base_group[0] = baseGroupX;
+ desc->root.cs.base_group[1] = baseGroupY;
+ desc->root.cs.base_group[2] = baseGroupZ;
+
+ kk_flush_compute_state(cmd);
+
+ struct mtl_size grid_size = {
+ .x = groupCountX,
+ .y = groupCountY,
+ .z = groupCountZ,
+ };
+ mtl_compute_encoder *enc = kk_compute_encoder(cmd);
+ mtl_dispatch_threads(enc, grid_size, cmd->state.cs.local_size);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer,
+ VkDeviceSize offset)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_buffer, buffer, _buffer);
+
+ struct kk_descriptor_state *desc = &cmd->state.cs.descriptors;
+ desc->root_dirty |= desc->root.cs.base_group[0] != 0;
+ desc->root_dirty |= desc->root.cs.base_group[1] != 0;
+ desc->root_dirty |= desc->root.cs.base_group[2] != 0;
+ desc->root.cs.base_group[0] = 0;
+ desc->root.cs.base_group[1] = 0;
+ desc->root.cs.base_group[2] = 0;
+
+ kk_flush_compute_state(cmd);
+
+ mtl_compute_encoder *enc = kk_compute_encoder(cmd);
+ mtl_dispatch_threadgroups_with_indirect_buffer(
+ enc, buffer->mtl_handle, offset, cmd->state.cs.local_size);
+}
diff --git a/src/kosmickrisp/vulkan/kk_cmd_draw.c b/src/kosmickrisp/vulkan/kk_cmd_draw.c
new file mode 100644
index 00000000000..84008e68af6
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_cmd_draw.c
@@ -0,0 +1,1010 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2024 Valve Corporation
+ * Copyright 2024 Alyssa Rosenzweig
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_entrypoints.h"
+
+#include "kk_buffer.h"
+#include "kk_cmd_buffer.h"
+#include "kk_encoder.h"
+#include "kk_format.h"
+#include "kk_image_view.h"
+#include "kk_query_pool.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+#include "kosmickrisp/bridge/vk_to_mtl_map.h"
+
+#include "vulkan/util/vk_format.h"
+
+static void
+kk_cmd_buffer_dirty_render_pass(struct kk_cmd_buffer *cmd)
+{
+ struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
+
+ /* These depend on color attachment count */
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES);
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES);
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS);
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS);
+
+ /* These depend on the depth/stencil format */
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE);
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE);
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE);
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE);
+
+ /* This may depend on render targets for ESO */
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES);
+
+ /* This may depend on render targets */
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP);
+}
+
+static void
+kk_attachment_init(struct kk_attachment *att,
+ const VkRenderingAttachmentInfo *info)
+{
+ if (info == NULL || info->imageView == VK_NULL_HANDLE) {
+ *att = (struct kk_attachment){
+ .iview = NULL,
+ };
+ return;
+ }
+
+ VK_FROM_HANDLE(kk_image_view, iview, info->imageView);
+ *att = (struct kk_attachment){
+ .vk_format = iview->vk.format,
+ .iview = iview,
+ };
+
+ if (info->resolveMode != VK_RESOLVE_MODE_NONE) {
+ VK_FROM_HANDLE(kk_image_view, res_iview, info->resolveImageView);
+ att->resolve_mode = info->resolveMode;
+ att->resolve_iview = res_iview;
+ }
+
+ att->store_op = info->storeOp;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetRenderingAreaGranularityKHR(
+ VkDevice device, const VkRenderingAreaInfoKHR *pRenderingAreaInfo,
+ VkExtent2D *pGranularity)
+{
+ *pGranularity = (VkExtent2D){.width = 1, .height = 1};
+}
+
+static void
+kk_merge_render_iview(VkExtent2D *extent, struct kk_image_view *iview)
+{
+ if (iview) {
+ /* TODO: is this right for ycbcr? */
+ unsigned level = iview->vk.base_mip_level;
+ unsigned width = u_minify(iview->vk.image->extent.width, level);
+ unsigned height = u_minify(iview->vk.image->extent.height, level);
+
+ extent->width = MAX2(extent->width, width);
+ extent->height = MAX2(extent->height, height);
+ }
+}
+
+static void
+kk_fill_common_attachment_description(
+ mtl_render_pass_attachment_descriptor *descriptor,
+ const struct kk_image_view *iview, const VkRenderingAttachmentInfo *info,
+ bool force_attachment_load)
+{
+ assert(iview->plane_count ==
+ 1); /* TODO_KOSMICKRISP Handle multiplanar images? */
+ mtl_render_pass_attachment_descriptor_set_texture(
+ descriptor, iview->planes[0].mtl_handle_render);
+ mtl_render_pass_attachment_descriptor_set_level(descriptor,
+ iview->vk.base_mip_level);
+ mtl_render_pass_attachment_descriptor_set_slice(descriptor,
+ iview->vk.base_array_layer);
+ enum mtl_load_action load_action =
+ force_attachment_load
+ ? MTL_LOAD_ACTION_LOAD
+ : vk_attachment_load_op_to_mtl_load_action(info->loadOp);
+ mtl_render_pass_attachment_descriptor_set_load_action(descriptor,
+ load_action);
+ /* We need to force attachment store to correctly handle situations where the
+ * attachment is written to in a subpass, and later read from in the next one
+ * with the store operation being something else than store. The other reason
+ * being that we break renderpasses when a pipeline barrier is used, so we
+ * need to not loose the information of the attachment when we restart it. */
+ enum mtl_store_action store_action = MTL_STORE_ACTION_STORE;
+ mtl_render_pass_attachment_descriptor_set_store_action(descriptor,
+ store_action);
+}
+
+static struct mtl_clear_color
+vk_clear_color_value_to_mtl_clear_color(union VkClearColorValue color,
+ enum pipe_format format)
+{
+ struct mtl_clear_color value;
+ if (util_format_is_pure_sint(format)) {
+ value.red = color.int32[0];
+ value.green = color.int32[1];
+ value.blue = color.int32[2];
+ value.alpha = color.int32[3];
+ } else if (util_format_is_pure_uint(format)) {
+ value.red = color.uint32[0];
+ value.green = color.uint32[1];
+ value.blue = color.uint32[2];
+ value.alpha = color.uint32[3];
+ } else {
+ value.red = color.float32[0];
+ value.green = color.float32[1];
+ value.blue = color.float32[2];
+ value.alpha = color.float32[3];
+ }
+
+ /* Apply swizzle to color since Metal does not allow swizzle for renderable
+ * textures, but we need to support that for formats like
+ * VK_FORMAT_B4G4R4A4_UNORM_PACK16 */
+ const struct kk_va_format *supported_format = kk_get_va_format(format);
+ struct mtl_clear_color swizzled_color;
+ for (uint32_t i = 0u; i < 4; ++i)
+ swizzled_color.channel[i] =
+ value.channel[supported_format->swizzle.channels[i]];
+
+ return swizzled_color;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdBeginRendering(VkCommandBuffer commandBuffer,
+ const VkRenderingInfo *pRenderingInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+
+ struct kk_rendering_state *render = &cmd->state.gfx.render;
+
+ memset(render, 0, sizeof(*render));
+
+ render->flags = pRenderingInfo->flags;
+ render->area = pRenderingInfo->renderArea;
+ render->view_mask = pRenderingInfo->viewMask;
+ render->layer_count = pRenderingInfo->layerCount;
+ render->samples = 0;
+ render->color_att_count = pRenderingInfo->colorAttachmentCount;
+
+ const uint32_t layer_count = render->view_mask
+ ? util_last_bit(render->view_mask)
+ : render->layer_count;
+
+ VkExtent2D framebuffer_extent = {.width = 0u, .height = 0u};
+ bool does_any_attachment_clear = false;
+ for (uint32_t i = 0; i < render->color_att_count; i++) {
+ kk_attachment_init(&render->color_att[i],
+ &pRenderingInfo->pColorAttachments[i]);
+ kk_merge_render_iview(&framebuffer_extent, render->color_att[i].iview);
+ does_any_attachment_clear |=
+ (pRenderingInfo->pColorAttachments[i].loadOp ==
+ VK_ATTACHMENT_LOAD_OP_CLEAR);
+ }
+ if (pRenderingInfo->pDepthAttachment)
+ does_any_attachment_clear |= (pRenderingInfo->pDepthAttachment->loadOp ==
+ VK_ATTACHMENT_LOAD_OP_CLEAR);
+ if (pRenderingInfo->pStencilAttachment)
+ does_any_attachment_clear |=
+ (pRenderingInfo->pStencilAttachment->loadOp ==
+ VK_ATTACHMENT_LOAD_OP_CLEAR);
+
+ kk_attachment_init(&render->depth_att, pRenderingInfo->pDepthAttachment);
+ kk_attachment_init(&render->stencil_att, pRenderingInfo->pStencilAttachment);
+ kk_merge_render_iview(&framebuffer_extent,
+ render->depth_att.iview ?: render->stencil_att.iview);
+
+ const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_att_info =
+ vk_find_struct_const(pRenderingInfo->pNext,
+ RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
+ if (fsr_att_info != NULL && fsr_att_info->imageView != VK_NULL_HANDLE) {
+ VK_FROM_HANDLE(kk_image_view, iview, fsr_att_info->imageView);
+ render->fsr_att = (struct kk_attachment){
+ .vk_format = iview->vk.format,
+ .iview = iview,
+ .store_op = VK_ATTACHMENT_STORE_OP_NONE,
+ };
+ }
+
+ const VkRenderingAttachmentLocationInfoKHR ral_info = {
+ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_LOCATION_INFO_KHR,
+ .colorAttachmentCount = pRenderingInfo->colorAttachmentCount,
+ };
+ vk_cmd_set_rendering_attachment_locations(&cmd->vk, &ral_info);
+
+ kk_cmd_buffer_dirty_render_pass(cmd);
+ mtl_render_pass_descriptor *pass_descriptor =
+ mtl_new_render_pass_descriptor();
+
+ /* Framebufferless rendering, need to set pass_descriptors
+ * renderTargetWidth/Height to non-0 values and defaultRasterSampleCount */
+ if (framebuffer_extent.width == 0u && framebuffer_extent.height == 0u) {
+ framebuffer_extent.width = render->area.extent.width;
+ framebuffer_extent.height = render->area.extent.height;
+ mtl_render_pass_descriptor_set_render_target_width(
+ pass_descriptor, framebuffer_extent.width);
+ mtl_render_pass_descriptor_set_render_target_height(
+ pass_descriptor, framebuffer_extent.height);
+ mtl_render_pass_descriptor_set_default_raster_sample_count(
+ pass_descriptor, 1u);
+ }
+
+ /* Check if we are rendering to the whole framebuffer. Required to understand
+ * if we need to load to avoid clearing all attachment when loading.
+ */
+ bool is_whole_framebuffer =
+ framebuffer_extent.width == render->area.extent.width &&
+ framebuffer_extent.height == render->area.extent.height &&
+ render->area.offset.x == 0u && render->area.offset.y == 0u &&
+ (render->view_mask == 0u ||
+ render->view_mask == BITFIELD64_MASK(render->layer_count));
+
+ /* Understand if the render area is tile aligned so we know if we actually
+ * need to load the tile to not lose information. */
+ uint32_t tile_alignment = 31u;
+ bool is_tile_aligned = !(render->area.offset.x & tile_alignment) &&
+ !(render->area.offset.y & tile_alignment) &&
+ !(render->area.extent.width & tile_alignment) &&
+ !(render->area.extent.height & tile_alignment);
+
+ /* Rendering to the whole framebuffer */
+ is_tile_aligned |= is_whole_framebuffer;
+
+ /* There are 3 cases where we need to force a load instead of using the user
+ * defined load operation:
+ * 1. Render area is not tile aligned
+ * 2. Load operation is clear but doesn't render to the whole attachment
+ * 3. Resuming renderpass
+ */
+ bool force_attachment_load =
+ !is_tile_aligned ||
+ (!is_whole_framebuffer && does_any_attachment_clear) ||
+ (render->flags & VK_RENDERING_RESUMING_BIT);
+
+ for (uint32_t i = 0; i < render->color_att_count; i++) {
+ const struct kk_image_view *iview = render->color_att[i].iview;
+ if (!iview)
+ continue;
+
+ assert(iview->plane_count ==
+ 1); /* TODO_KOSMICKRISP Handle multiplanar images? */
+ const struct kk_image *image =
+ container_of(iview->vk.image, struct kk_image, vk);
+ render->samples = image->vk.samples;
+
+ mtl_render_pass_attachment_descriptor *attachment_descriptor =
+ mtl_render_pass_descriptor_get_color_attachment(pass_descriptor, i);
+ kk_fill_common_attachment_description(
+ attachment_descriptor, iview, &pRenderingInfo->pColorAttachments[i],
+ force_attachment_load);
+ struct mtl_clear_color clear_color =
+ vk_clear_color_value_to_mtl_clear_color(
+ pRenderingInfo->pColorAttachments[i].clearValue.color,
+ iview->planes[0].format);
+ mtl_render_pass_attachment_descriptor_set_clear_color(
+ attachment_descriptor, clear_color);
+ }
+
+ if (render->depth_att.iview) {
+ const struct kk_image_view *iview = render->depth_att.iview;
+ const struct kk_image *image =
+ container_of(iview->vk.image, struct kk_image, vk);
+ render->samples = image->vk.samples;
+
+ mtl_render_pass_attachment_descriptor *attachment_descriptor =
+ mtl_render_pass_descriptor_get_depth_attachment(pass_descriptor);
+ kk_fill_common_attachment_description(
+ attachment_descriptor, render->depth_att.iview,
+ pRenderingInfo->pDepthAttachment, force_attachment_load);
+ mtl_render_pass_attachment_descriptor_set_clear_depth(
+ attachment_descriptor,
+ pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth);
+ }
+ if (render->stencil_att.iview) {
+ const struct kk_image_view *iview = render->stencil_att.iview;
+ const struct kk_image *image =
+ container_of(iview->vk.image, struct kk_image, vk);
+ render->samples = image->vk.samples;
+
+ mtl_render_pass_attachment_descriptor *attachment_descriptor =
+ mtl_render_pass_descriptor_get_stencil_attachment(pass_descriptor);
+ kk_fill_common_attachment_description(
+ attachment_descriptor, render->stencil_att.iview,
+ pRenderingInfo->pStencilAttachment, force_attachment_load);
+ mtl_render_pass_attachment_descriptor_set_clear_stencil(
+ attachment_descriptor,
+ pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil);
+ }
+
+ /* Render targets are always arrays */
+ mtl_render_pass_descriptor_set_render_target_array_length(
+ pass_descriptor, layer_count ? layer_count : 1u);
+
+ /* Set global visibility buffer */
+ mtl_render_pass_descriptor_set_visibility_buffer(
+ pass_descriptor, dev->occlusion_queries.bo->map);
+
+ // TODO_KOSMICKRISP Fragment shading rate support goes here if Metal supports
+ // it
+
+ /* Start new encoder and encode sync commands from previous barriers (aka
+ * fences) */
+ kk_encoder_start_render(cmd, pass_descriptor, render->view_mask);
+
+ /* Store descriptor in case we need to restart the pass at pipeline barrier,
+ * but force loads */
+ for (uint32_t i = 0; i < render->color_att_count; i++) {
+ const struct kk_image_view *iview = render->color_att[i].iview;
+ if (!iview)
+ continue;
+ mtl_render_pass_attachment_descriptor *attachment_descriptor =
+ mtl_render_pass_descriptor_get_color_attachment(pass_descriptor, i);
+ mtl_render_pass_attachment_descriptor_set_load_action(
+ attachment_descriptor, MTL_LOAD_ACTION_LOAD);
+ }
+ if (render->depth_att.iview) {
+ mtl_render_pass_attachment_descriptor *attachment_descriptor =
+ mtl_render_pass_descriptor_get_depth_attachment(pass_descriptor);
+ mtl_render_pass_attachment_descriptor_set_load_action(
+ attachment_descriptor, MTL_LOAD_ACTION_LOAD);
+ }
+ if (render->stencil_att.iview) {
+ mtl_render_pass_attachment_descriptor *attachment_descriptor =
+ mtl_render_pass_descriptor_get_stencil_attachment(pass_descriptor);
+ mtl_render_pass_attachment_descriptor_set_load_action(
+ attachment_descriptor, MTL_LOAD_ACTION_LOAD);
+ }
+ cmd->state.gfx.render_pass_descriptor = pass_descriptor;
+
+ kk_cmd_buffer_dirty_all_gfx(cmd);
+
+ if (render->flags & VK_RENDERING_RESUMING_BIT)
+ return;
+
+ /* Clear attachments if we forced a load and there's a clear */
+ if (!force_attachment_load || !does_any_attachment_clear)
+ return;
+
+ uint32_t clear_count = 0;
+ VkClearAttachment clear_att[KK_MAX_RTS + 1];
+ for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
+ const VkRenderingAttachmentInfo *att_info =
+ &pRenderingInfo->pColorAttachments[i];
+ if (att_info->imageView == VK_NULL_HANDLE ||
+ att_info->loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
+ continue;
+
+ clear_att[clear_count++] = (VkClearAttachment){
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .colorAttachment = i,
+ .clearValue = att_info->clearValue,
+ };
+ }
+
+ clear_att[clear_count] = (VkClearAttachment){
+ .aspectMask = 0,
+ };
+ if (pRenderingInfo->pDepthAttachment != NULL &&
+ pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
+ pRenderingInfo->pDepthAttachment->loadOp ==
+ VK_ATTACHMENT_LOAD_OP_CLEAR) {
+ clear_att[clear_count].aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
+ clear_att[clear_count].clearValue.depthStencil.depth =
+ pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth;
+ }
+ if (pRenderingInfo->pStencilAttachment != NULL &&
+ pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
+ pRenderingInfo->pStencilAttachment->loadOp ==
+ VK_ATTACHMENT_LOAD_OP_CLEAR) {
+ clear_att[clear_count].aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
+ clear_att[clear_count].clearValue.depthStencil.stencil =
+ pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil;
+ }
+ if (clear_att[clear_count].aspectMask != 0)
+ clear_count++;
+
+ if (clear_count > 0) {
+ const VkClearRect clear_rect = {
+ .rect = render->area,
+ .baseArrayLayer = 0,
+ .layerCount = render->view_mask ? 1 : render->layer_count,
+ };
+
+ kk_CmdClearAttachments(kk_cmd_buffer_to_handle(cmd), clear_count,
+ clear_att, 1, &clear_rect);
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdEndRendering(VkCommandBuffer commandBuffer)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ struct kk_rendering_state *render = &cmd->state.gfx.render;
+ bool need_resolve = false;
+
+ /* Translate render state back to VK for meta */
+ VkRenderingAttachmentInfo vk_color_att[KK_MAX_RTS];
+ for (uint32_t i = 0; i < render->color_att_count; i++) {
+ if (render->color_att[i].resolve_mode != VK_RESOLVE_MODE_NONE)
+ need_resolve = true;
+
+ vk_color_att[i] = (VkRenderingAttachmentInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
+ .imageView = kk_image_view_to_handle(render->color_att[i].iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .resolveMode = render->color_att[i].resolve_mode,
+ .resolveImageView =
+ kk_image_view_to_handle(render->color_att[i].resolve_iview),
+ .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ }
+
+ const VkRenderingAttachmentInfo vk_depth_att = {
+ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
+ .imageView = kk_image_view_to_handle(render->depth_att.iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .resolveMode = render->depth_att.resolve_mode,
+ .resolveImageView =
+ kk_image_view_to_handle(render->depth_att.resolve_iview),
+ .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ if (render->depth_att.resolve_mode != VK_RESOLVE_MODE_NONE)
+ need_resolve = true;
+
+ const VkRenderingAttachmentInfo vk_stencil_att = {
+ .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
+ .imageView = kk_image_view_to_handle(render->stencil_att.iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .resolveMode = render->stencil_att.resolve_mode,
+ .resolveImageView =
+ kk_image_view_to_handle(render->stencil_att.resolve_iview),
+ .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ };
+ if (render->stencil_att.resolve_mode != VK_RESOLVE_MODE_NONE)
+ need_resolve = true;
+
+ const VkRenderingInfo vk_render = {
+ .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
+ .renderArea = render->area,
+ .layerCount = render->layer_count,
+ .viewMask = render->view_mask,
+ .colorAttachmentCount = render->color_att_count,
+ .pColorAttachments = vk_color_att,
+ .pDepthAttachment = &vk_depth_att,
+ .pStencilAttachment = &vk_stencil_att,
+ };
+
+ /* Clean up previous encoder */
+ kk_encoder_signal_fence_and_end(cmd);
+ mtl_release(cmd->state.gfx.render_pass_descriptor);
+ cmd->state.gfx.render_pass_descriptor = NULL;
+
+ if (render->flags & VK_RENDERING_SUSPENDING_BIT)
+ need_resolve = false;
+
+ memset(render, 0, sizeof(*render));
+
+ if (need_resolve) {
+ kk_meta_resolve_rendering(cmd, &vk_render);
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdBindIndexBuffer2KHR(VkCommandBuffer commandBuffer, VkBuffer _buffer,
+ VkDeviceSize offset, VkDeviceSize size,
+ VkIndexType indexType)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_buffer, buffer, _buffer);
+
+ cmd->state.gfx.index.handle = buffer->mtl_handle;
+ cmd->state.gfx.index.size = size;
+ cmd->state.gfx.index.offset = offset;
+ cmd->state.gfx.index.bytes_per_index = vk_index_type_to_bytes(indexType);
+ cmd->state.gfx.index.restart = vk_index_to_restart(indexType);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding,
+ uint32_t bindingCount, const VkBuffer *pBuffers,
+ const VkDeviceSize *pOffsets,
+ const VkDeviceSize *pSizes,
+ const VkDeviceSize *pStrides)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+
+ if (pStrides) {
+ vk_cmd_set_vertex_binding_strides(&cmd->vk, firstBinding, bindingCount,
+ pStrides);
+ }
+
+ for (uint32_t i = 0; i < bindingCount; i++) {
+ VK_FROM_HANDLE(kk_buffer, buffer, pBuffers[i]);
+ uint32_t idx = firstBinding + i;
+ uint64_t size = pSizes ? pSizes[i] : VK_WHOLE_SIZE;
+ const struct kk_addr_range addr_range =
+ kk_buffer_addr_range(buffer, pOffsets[i], size);
+ cmd->state.gfx.vb.addr_range[idx] = addr_range;
+ cmd->state.gfx.vb.handles[idx] = buffer->mtl_handle;
+ cmd->state.gfx.dirty |= KK_DIRTY_VB;
+ }
+}
+
+static void
+kk_flush_vp_state(struct kk_cmd_buffer *cmd)
+{
+ const struct vk_dynamic_graphics_state *dyn =
+ &cmd->vk.dynamic_graphics_state;
+
+ /* We always need at least 1 viewport for the hardware. With rasterizer
+ * discard the app may not supply any, but we can just program garbage.
+ */
+ unsigned count = MAX2(dyn->vp.scissor_count, 1);
+
+ /* Need to clamp scissor rectangles to render area, otherwise Metal doesn't
+ * like it */
+ struct mtl_scissor_rect rects[KK_MAX_VIEWPORTS] = {0};
+ VkOffset2D origin = cmd->state.gfx.render.area.offset;
+ VkOffset2D end = {.x = origin.x + cmd->state.gfx.render.area.extent.width,
+ .y = origin.y + cmd->state.gfx.render.area.extent.height};
+ for (uint32_t i = 0; i < dyn->vp.scissor_count; i++) {
+ const VkRect2D *rect = &dyn->vp.scissors[i];
+
+ size_t x0 = CLAMP(rect->offset.x, origin.x, end.x);
+ size_t x1 = CLAMP(rect->offset.x + rect->extent.width, origin.x, end.x);
+ size_t y0 = CLAMP(rect->offset.y, origin.y, end.y);
+ size_t y1 = CLAMP(rect->offset.y + rect->extent.height, origin.y, end.y);
+ size_t minx = MIN2(x0, x1);
+ size_t miny = MIN2(y0, y1);
+ size_t maxx = MAX2(x0, x1);
+ size_t maxy = MAX2(y0, y1);
+ rects[i].x = minx;
+ rects[i].y = miny;
+ rects[i].width = maxx - minx;
+ rects[i].height = maxy - miny;
+ }
+
+ mtl_set_scissor_rects(kk_render_encoder(cmd), rects, count);
+
+ count = MAX2(dyn->vp.viewport_count, 1);
+
+ struct mtl_viewport viewports[KK_MAX_VIEWPORTS] = {0};
+
+ /* NDC in Metal is pointing downwards. Vulkan is pointing upwards. Account
+ * for that here */
+ for (uint32_t i = 0; i < dyn->vp.viewport_count; i++) {
+ const VkViewport *vp = &dyn->vp.viewports[i];
+
+ viewports[i].originX = vp->x;
+ viewports[i].originY = vp->y + vp->height;
+ viewports[i].width = vp->width;
+ viewports[i].height = -vp->height;
+
+ viewports[i].znear = vp->minDepth;
+ viewports[i].zfar = vp->maxDepth;
+ }
+
+ mtl_set_viewports(kk_render_encoder(cmd), viewports, count);
+}
+
+static inline uint32_t
+kk_calculate_vbo_clamp(uint64_t vbuf, uint64_t sink, enum pipe_format format,
+ uint32_t size_B, uint32_t stride_B, uint32_t offset_B,
+ uint64_t *vbuf_out)
+{
+ unsigned elsize_B = util_format_get_blocksize(format);
+ unsigned subtracted_B = offset_B + elsize_B;
+
+ /* If at least one index is valid, determine the max. Otherwise, direct reads
+ * to zero.
+ */
+ if (size_B >= subtracted_B) {
+ *vbuf_out = vbuf + offset_B;
+
+ /* If stride is zero, do not clamp, everything is valid. */
+ if (stride_B)
+ return ((size_B - subtracted_B) / stride_B);
+ else
+ return UINT32_MAX;
+ } else {
+ *vbuf_out = sink;
+ return 0;
+ }
+}
+
+static void
+set_empty_scissor(mtl_render_encoder *enc)
+{
+ struct mtl_scissor_rect rect = {.x = 0u, .y = 0u, .width = 0u, .height = 0u};
+ mtl_set_scissor_rects(enc, &rect, 1);
+}
+
+/* TODO_KOSMICKRISP: Move to common */
+static inline enum mesa_prim
+vk_conv_topology(VkPrimitiveTopology topology)
+{
+ switch (topology) {
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+ return MESA_PRIM_POINTS;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+ return MESA_PRIM_LINES;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+ return MESA_PRIM_LINE_STRIP;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wswitch"
+ case VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA:
+#pragma GCC diagnostic pop
+ return MESA_PRIM_TRIANGLES;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+ return MESA_PRIM_TRIANGLE_STRIP;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+ return MESA_PRIM_TRIANGLE_FAN;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ return MESA_PRIM_LINES_ADJACENCY;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+ return MESA_PRIM_LINE_STRIP_ADJACENCY;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ return MESA_PRIM_TRIANGLES_ADJACENCY;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+ return MESA_PRIM_TRIANGLE_STRIP_ADJACENCY;
+ case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+ return MESA_PRIM_PATCHES;
+ default:
+ UNREACHABLE("invalid");
+ }
+}
+
+static void
+kk_flush_draw_state(struct kk_cmd_buffer *cmd)
+{
+ struct kk_device *device = kk_cmd_buffer_device(cmd);
+ struct kk_graphics_state *gfx = &cmd->state.gfx;
+ struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state;
+ struct kk_descriptor_state *desc = &cmd->state.gfx.descriptors;
+ mtl_render_encoder *enc = kk_render_encoder(cmd);
+
+ if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES)) {
+ u_foreach_bit(ndx, dyn->vi->bindings_valid) {
+ desc->root.draw.buffer_strides[ndx] = dyn->vi_binding_strides[ndx];
+ }
+ desc->root_dirty = true;
+ }
+
+ if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE)) {
+ if (dyn->rs.rasterizer_discard_enable) {
+ set_empty_scissor(enc);
+ } else {
+ /* Enforce setting the correct scissors */
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT);
+ }
+ }
+
+ if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE)) {
+ gfx->is_cull_front_and_back =
+ dyn->rs.cull_mode == VK_CULL_MODE_FRONT_AND_BACK;
+ if (gfx->is_cull_front_and_back) {
+ set_empty_scissor(enc);
+ } else {
+ mtl_set_cull_mode(enc,
+ vk_front_face_to_mtl_cull_mode(dyn->rs.cull_mode));
+ /* Enforce setting the correct scissors */
+ BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT);
+ }
+ }
+
+ if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) {
+ gfx->primitive_type = vk_primitive_topology_to_mtl_primitive_type(
+ dyn->ia.primitive_topology);
+ gfx->prim = vk_conv_topology(dyn->ia.primitive_topology);
+ }
+
+ if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
+ gfx->restart_disabled = !dyn->ia.primitive_restart_enable;
+ }
+
+ /* We enable raster discard by setting scissor to size (0, 0) */
+ if (!(dyn->rs.rasterizer_discard_enable || gfx->is_cull_front_and_back) &&
+ (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT) ||
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS)))
+ kk_flush_vp_state(cmd);
+
+ if (cmd->state.gfx.is_depth_stencil_dynamic &&
+ (cmd->state.gfx.render.depth_att.vk_format != VK_FORMAT_UNDEFINED ||
+ cmd->state.gfx.render.stencil_att.vk_format != VK_FORMAT_UNDEFINED) &&
+ (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) |
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) |
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) |
+ // BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE)
+ // | BITSET_TEST(dyn->dirty,
+ // MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS) |
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) |
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) |
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) |
+ BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK))) {
+ kk_cmd_release_dynamic_ds_state(cmd);
+
+ bool has_depth = dyn->rp.attachments & MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
+ bool has_stencil =
+ dyn->rp.attachments & MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
+ gfx->depth_stencil_state = kk_compile_depth_stencil_state(
+ device, &dyn->ds, has_depth, has_stencil);
+ mtl_set_depth_stencil_state(enc, gfx->depth_stencil_state);
+ }
+
+ if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE)) {
+ mtl_set_front_face_winding(
+ enc, vk_front_face_to_mtl_winding(
+ cmd->vk.dynamic_graphics_state.rs.front_face));
+ }
+
+ if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
+ mtl_set_depth_bias(enc, dyn->rs.depth_bias.constant_factor,
+ dyn->rs.depth_bias.slope_factor,
+ dyn->rs.depth_bias.clamp);
+ }
+
+ if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE)) {
+ enum mtl_depth_clip_mode mode = dyn->rs.depth_clamp_enable
+ ? MTL_DEPTH_CLIP_MODE_CLAMP
+ : MTL_DEPTH_CLIP_MODE_CLIP;
+ mtl_set_depth_clip_mode(enc, mode);
+ }
+
+ if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE))
+ mtl_set_stencil_references(
+ enc, cmd->vk.dynamic_graphics_state.ds.stencil.front.reference,
+ cmd->vk.dynamic_graphics_state.ds.stencil.back.reference);
+
+ if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
+ static_assert(sizeof(desc->root.draw.blend_constant) ==
+ sizeof(dyn->cb.blend_constants),
+ "common size");
+
+ memcpy(desc->root.draw.blend_constant, dyn->cb.blend_constants,
+ sizeof(dyn->cb.blend_constants));
+ desc->root_dirty = true;
+ }
+
+ if (gfx->dirty & KK_DIRTY_VB) {
+ unsigned slot = 0;
+ cmd->state.gfx.vb.max_vertices = 0u;
+ u_foreach_bit(i, cmd->state.gfx.vb.attribs_read) {
+ if (dyn->vi->attributes_valid & BITFIELD_BIT(i)) {
+ struct vk_vertex_attribute_state attr = dyn->vi->attributes[i];
+ struct kk_addr_range vb = gfx->vb.addr_range[attr.binding];
+
+ mtl_render_use_resource(enc, gfx->vb.handles[attr.binding],
+ MTL_RESOURCE_USAGE_READ);
+ desc->root.draw.attrib_clamps[slot] = kk_calculate_vbo_clamp(
+ vb.addr, 0, vk_format_to_pipe_format(attr.format), vb.range,
+ dyn->vi_binding_strides[attr.binding], attr.offset,
+ &desc->root.draw.attrib_base[slot]);
+ desc->root.draw.buffer_strides[attr.binding] =
+ dyn->vi_binding_strides[attr.binding];
+
+ cmd->state.gfx.vb.max_vertices =
+ MAX2(vb.range / dyn->vi_binding_strides[attr.binding],
+ cmd->state.gfx.vb.max_vertices);
+ }
+ slot++;
+ }
+ desc->root_dirty = true;
+ }
+
+ if (gfx->dirty & KK_DIRTY_PIPELINE) {
+ mtl_render_set_pipeline_state(enc, gfx->pipeline_state);
+ if (gfx->depth_stencil_state)
+ mtl_set_depth_stencil_state(enc, gfx->depth_stencil_state);
+ }
+
+ if (desc->push_dirty)
+ kk_cmd_buffer_flush_push_descriptors(cmd, desc);
+ /* After push descriptors' buffers are created. Otherwise, the buffer where
+ * they live will not be created and cannot make it resident */
+ if (desc->sets_not_resident)
+ kk_make_descriptor_resources_resident(cmd,
+ VK_PIPELINE_BIND_POINT_GRAPHICS);
+ if (desc->root_dirty)
+ kk_upload_descriptor_root(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS);
+
+ /* Make user allocated heaps resident */
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+ simple_mtx_lock(&dev->user_heap_cache.mutex);
+ if (cmd->encoder->main.user_heap_hash != dev->user_heap_cache.hash) {
+ cmd->encoder->main.user_heap_hash = dev->user_heap_cache.hash;
+ mtl_heap **heaps = util_dynarray_begin(&dev->user_heap_cache.handles);
+ uint32_t count =
+ util_dynarray_num_elements(&dev->user_heap_cache.handles, mtl_heap *);
+ mtl_render_use_heaps(enc, heaps, count);
+ }
+ simple_mtx_unlock(&dev->user_heap_cache.mutex);
+
+ struct kk_bo *root_buffer = desc->root.root_buffer;
+ if (root_buffer) {
+ mtl_set_vertex_buffer(enc, root_buffer->map, 0, 0);
+ mtl_set_fragment_buffer(enc, root_buffer->map, 0, 0);
+ }
+
+ if (gfx->dirty & KK_DIRTY_OCCLUSION) {
+ mtl_set_visibility_result_mode(enc, gfx->occlusion.mode,
+ gfx->occlusion.index * sizeof(uint64_t));
+ }
+
+ gfx->dirty = 0u;
+ vk_dynamic_graphics_state_clear_dirty(dyn);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount,
+ uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+
+ kk_flush_draw_state(cmd);
+
+ /* Metal does not support triangle fans */
+ bool requires_unroll = cmd->state.gfx.prim == MESA_PRIM_TRIANGLE_FAN;
+ if (requires_unroll) {
+ VkDrawIndirectCommand draw = {
+ .vertexCount = vertexCount,
+ .instanceCount = instanceCount,
+ .firstVertex = firstVertex,
+ .firstInstance = firstInstance,
+ };
+ struct kk_pool pool = kk_pool_upload(cmd, &draw, sizeof(draw), 4u);
+ kk_encoder_render_triangle_fan_indirect(cmd, pool.handle, 0u);
+ } else {
+ mtl_render_encoder *enc = kk_render_encoder(cmd);
+ mtl_draw_primitives(enc, cmd->state.gfx.primitive_type, firstVertex,
+ vertexCount, instanceCount, firstInstance);
+ }
+}
+
+static bool
+requires_increasing_index_el_size(struct kk_cmd_buffer *cmd)
+{
+ enum mesa_prim prim = cmd->state.gfx.prim;
+ switch (prim) {
+ case MESA_PRIM_LINE_STRIP:
+ case MESA_PRIM_TRIANGLE_STRIP:
+ case MESA_PRIM_TRIANGLE_FAN:
+ return (cmd->state.gfx.restart_disabled &&
+ cmd->state.gfx.index.bytes_per_index < sizeof(uint32_t));
+ default:
+ return false;
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount,
+ uint32_t instanceCount, uint32_t firstIndex,
+ int32_t vertexOffset, uint32_t firstInstance)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+
+ kk_flush_draw_state(cmd);
+
+ /* Metal does not support triangle fans */
+ bool requires_triangle_fan_unroll =
+ cmd->state.gfx.prim == MESA_PRIM_TRIANGLE_FAN;
+
+ /* Metal does not support disabling primitive restart. We need to create a
+ * new index buffer for primitives that allow restart (line strip, triangle
+ * strip and triangle fan). Never ever support
+ * VK_EXT_primitive_topology_list_restart since it'll just add overhead */
+ bool increase_index_el_size = requires_increasing_index_el_size(cmd);
+ if (requires_triangle_fan_unroll || increase_index_el_size) {
+ VkDrawIndexedIndirectCommand draw = {
+ .indexCount = indexCount,
+ .instanceCount = instanceCount,
+ .firstIndex = firstIndex,
+ .vertexOffset = vertexOffset,
+ .firstInstance = firstInstance,
+ };
+ struct kk_pool pool = kk_pool_upload(cmd, &draw, sizeof(draw), 4u);
+ kk_encoder_render_triangle_fan_indexed_indirect(cmd, pool.handle, 0u,
+ increase_index_el_size);
+ } else {
+ uint32_t bytes_per_index = cmd->state.gfx.index.bytes_per_index;
+ enum mtl_index_type index_type =
+ index_size_in_bytes_to_mtl_index_type(bytes_per_index);
+ uint32_t index_buffer_offset_B =
+ firstIndex * bytes_per_index + cmd->state.gfx.index.offset;
+
+ mtl_render_encoder *enc = kk_render_encoder(cmd);
+ mtl_draw_indexed_primitives(
+ enc, cmd->state.gfx.primitive_type, indexCount, index_type,
+ cmd->state.gfx.index.handle, index_buffer_offset_B, instanceCount,
+ vertexOffset, firstInstance);
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer,
+ VkDeviceSize offset, uint32_t drawCount, uint32_t stride)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_buffer, buffer, _buffer);
+
+ kk_flush_draw_state(cmd);
+ mtl_render_encoder *enc = kk_render_encoder(cmd);
+
+ /* Metal does not support triangle fans */
+ bool requires_unroll = cmd->state.gfx.prim == MESA_PRIM_TRIANGLE_FAN;
+ for (uint32_t i = 0u; i < drawCount; ++i, offset += stride) {
+ if (requires_unroll) {
+ kk_encoder_render_triangle_fan_indirect(cmd, buffer->mtl_handle,
+ offset);
+ } else {
+ mtl_draw_primitives_indirect(enc, cmd->state.gfx.primitive_type,
+ buffer->mtl_handle, offset);
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer,
+ VkDeviceSize offset, VkBuffer countBuffer,
+ VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
+ uint32_t stride)
+{
+ /* TODO_KOSMICKRISP */
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer,
+ VkDeviceSize offset, uint32_t drawCount,
+ uint32_t stride)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_buffer, buffer, _buffer);
+
+ kk_flush_draw_state(cmd);
+
+ /* Metal does not support triangle fans */
+ bool requires_triangle_fan_unroll =
+ cmd->state.gfx.prim == MESA_PRIM_TRIANGLE_FAN;
+
+ /* Metal does not support disabling primitive restart. We need to create a
+ * new index buffer for primitives that allow restart (line strip, triangle
+ * strip and triangle fan). Never ever support
+ * VK_EXT_primitive_topology_list_restart since it'll just add overhead */
+ bool increase_index_el_size = requires_increasing_index_el_size(cmd);
+ for (uint32_t i = 0u; i < drawCount; ++i, offset += stride) {
+ if (requires_triangle_fan_unroll || increase_index_el_size) {
+ kk_encoder_render_triangle_fan_indexed_indirect(
+ cmd, buffer->mtl_handle, offset, increase_index_el_size);
+ } else {
+ uint32_t bytes_per_index = cmd->state.gfx.index.bytes_per_index;
+ enum mtl_index_type index_type =
+ index_size_in_bytes_to_mtl_index_type(bytes_per_index);
+ uint32_t index_buffer_offset = cmd->state.gfx.index.offset;
+
+ mtl_render_encoder *enc = kk_render_encoder(cmd);
+ mtl_draw_indexed_primitives_indirect(
+ enc, cmd->state.gfx.primitive_type, index_type,
+ cmd->state.gfx.index.handle, index_buffer_offset,
+ buffer->mtl_handle, offset);
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer,
+ VkDeviceSize offset, VkBuffer countBuffer,
+ VkDeviceSize countBufferOffset,
+ uint32_t maxDrawCount, uint32_t stride)
+{
+ /* TODO_KOSMICKRISP */
+}
diff --git a/src/kosmickrisp/vulkan/kk_cmd_meta.c b/src/kosmickrisp/vulkan/kk_cmd_meta.c
new file mode 100644
index 00000000000..debf9cd467c
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_cmd_meta.c
@@ -0,0 +1,318 @@
+/*
+ * Copyright 2024 Valve Corporation
+ * Copyright 2024 Alyssa Rosenzweig
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_private.h"
+
+#include "kk_buffer.h"
+#include "kk_cmd_buffer.h"
+#include "kk_encoder.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+#include "kk_entrypoints.h"
+
+static VkResult
+kk_cmd_bind_map_buffer(struct vk_command_buffer *vk_cmd,
+ struct vk_meta_device *meta, VkBuffer _buffer,
+ void **map_out)
+{
+ struct kk_cmd_buffer *cmd = container_of(vk_cmd, struct kk_cmd_buffer, vk);
+ VK_FROM_HANDLE(kk_buffer, buffer, _buffer);
+
+ assert(buffer->vk.size < UINT_MAX);
+ struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, buffer->vk.size, 16u);
+ if (unlikely(bo == NULL))
+ return VK_ERROR_OUT_OF_POOL_MEMORY;
+
+ /* Need to retain since VkBuffers release the mtl_handle too */
+ mtl_retain(bo->map);
+ buffer->mtl_handle = bo->map;
+ buffer->vk.device_address = bo->gpu;
+ *map_out = bo->cpu;
+ mtl_compute_use_resource(cmd->encoder->main.encoder, buffer->mtl_handle,
+ MTL_RESOURCE_USAGE_WRITE | MTL_RESOURCE_USAGE_READ);
+ return VK_SUCCESS;
+}
+
+VkResult
+kk_device_init_meta(struct kk_device *dev)
+{
+ VkResult result = vk_meta_device_init(&dev->vk, &dev->meta);
+ if (result != VK_SUCCESS)
+ return result;
+
+ dev->meta.use_gs_for_layer = false;
+ dev->meta.use_stencil_export = true;
+ dev->meta.use_rect_list_pipeline = true;
+ dev->meta.cmd_bind_map_buffer = kk_cmd_bind_map_buffer;
+ dev->meta.max_bind_map_buffer_size_B = 64 * 1024;
+
+ for (unsigned i = 0; i < VK_META_BUFFER_CHUNK_SIZE_COUNT; ++i) {
+ dev->meta.buffer_access.optimal_wg_size[i] = 64;
+ }
+
+ return VK_SUCCESS;
+}
+
+void
+kk_device_finish_meta(struct kk_device *dev)
+{
+ vk_meta_device_finish(&dev->vk, &dev->meta);
+}
+
+struct kk_meta_save {
+ struct vk_vertex_input_state _dynamic_vi;
+ struct vk_sample_locations_state _dynamic_sl;
+ struct vk_dynamic_graphics_state dynamic;
+ struct {
+ union {
+ struct {
+ mtl_render_pipeline_state *ps;
+ mtl_depth_stencil_state *ds;
+ uint32_t attribs_read;
+ enum mtl_primitive_type primitive_type;
+ enum mtl_visibility_result_mode occlusion;
+ bool is_ds_dynamic;
+ } gfx;
+ struct {
+ mtl_compute_pipeline_state *pipeline_state;
+ struct mtl_size local_size;
+ } cs;
+ };
+ } pipeline;
+ struct kk_descriptor_set *desc0;
+ struct kk_push_descriptor_set *push_desc0;
+ mtl_buffer *vb0_handle;
+ struct kk_addr_range vb0;
+ struct kk_buffer_address desc0_set_addr;
+ bool has_push_desc0;
+ uint8_t push[KK_MAX_PUSH_SIZE];
+};
+
+static void
+kk_meta_begin(struct kk_cmd_buffer *cmd, struct kk_meta_save *save,
+ VkPipelineBindPoint bind_point)
+{
+ struct kk_descriptor_state *desc = kk_get_descriptors_state(cmd, bind_point);
+
+ if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
+ save->dynamic = cmd->vk.dynamic_graphics_state;
+ save->_dynamic_vi = cmd->state.gfx._dynamic_vi;
+ save->_dynamic_sl = cmd->state.gfx._dynamic_sl;
+ save->pipeline.gfx.ps = cmd->state.gfx.pipeline_state;
+ save->pipeline.gfx.ds = cmd->state.gfx.depth_stencil_state;
+ save->pipeline.gfx.attribs_read = cmd->state.gfx.vb.attribs_read;
+ save->pipeline.gfx.primitive_type = cmd->state.gfx.primitive_type;
+ save->pipeline.gfx.occlusion = cmd->state.gfx.occlusion.mode;
+ save->pipeline.gfx.is_ds_dynamic =
+ cmd->state.gfx.is_depth_stencil_dynamic;
+
+ cmd->state.gfx.is_depth_stencil_dynamic = false;
+ cmd->state.gfx.depth_stencil_state = NULL;
+ cmd->state.gfx.occlusion.mode = MTL_VISIBILITY_RESULT_MODE_DISABLED;
+ cmd->state.gfx.dirty |= KK_DIRTY_OCCLUSION;
+ desc->root_dirty = true;
+ } else {
+ save->pipeline.cs.pipeline_state = cmd->state.cs.pipeline_state;
+ save->pipeline.cs.local_size = cmd->state.cs.local_size;
+ }
+
+ save->vb0_handle = cmd->state.gfx.vb.handles[0];
+ save->vb0 = cmd->state.gfx.vb.addr_range[0];
+
+ save->desc0 = desc->sets[0];
+ save->has_push_desc0 = desc->push[0];
+ if (save->has_push_desc0)
+ save->push_desc0 = desc->push[0];
+
+ static_assert(sizeof(save->push) == sizeof(desc->root.push),
+ "Size mismatch for push in meta_save");
+ memcpy(save->push, desc->root.push, sizeof(save->push));
+}
+
+static void
+kk_meta_end(struct kk_cmd_buffer *cmd, struct kk_meta_save *save,
+ VkPipelineBindPoint bind_point)
+{
+ struct kk_descriptor_state *desc = kk_get_descriptors_state(cmd, bind_point);
+ desc->root_dirty = true;
+
+ if (save->desc0) {
+ desc->sets[0] = save->desc0;
+ desc->root.sets[0] = save->desc0->addr;
+ desc->set_sizes[0] = save->desc0->size;
+ desc->sets_not_resident |= BITFIELD_BIT(0);
+ desc->push_dirty &= ~BITFIELD_BIT(0);
+ } else if (save->has_push_desc0) {
+ desc->push[0] = save->push_desc0;
+ desc->sets_not_resident |= BITFIELD_BIT(0);
+ desc->push_dirty |= BITFIELD_BIT(0);
+ }
+
+ if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
+ /* Restore the dynamic state */
+ assert(save->dynamic.vi == &cmd->state.gfx._dynamic_vi);
+ assert(save->dynamic.ms.sample_locations == &cmd->state.gfx._dynamic_sl);
+ cmd->vk.dynamic_graphics_state = save->dynamic;
+ cmd->state.gfx._dynamic_vi = save->_dynamic_vi;
+ cmd->state.gfx._dynamic_sl = save->_dynamic_sl;
+ memcpy(cmd->vk.dynamic_graphics_state.dirty,
+ cmd->vk.dynamic_graphics_state.set,
+ sizeof(cmd->vk.dynamic_graphics_state.set));
+
+ if (cmd->state.gfx.is_depth_stencil_dynamic)
+ mtl_release(cmd->state.gfx.depth_stencil_state);
+ cmd->state.gfx.pipeline_state = save->pipeline.gfx.ps;
+ cmd->state.gfx.depth_stencil_state = save->pipeline.gfx.ds;
+ cmd->state.gfx.primitive_type = save->pipeline.gfx.primitive_type;
+ cmd->state.gfx.vb.attribs_read = save->pipeline.gfx.attribs_read;
+ cmd->state.gfx.is_depth_stencil_dynamic =
+ save->pipeline.gfx.is_ds_dynamic;
+ cmd->state.gfx.dirty |= KK_DIRTY_PIPELINE;
+
+ cmd->state.gfx.vb.addr_range[0] = save->vb0;
+ cmd->state.gfx.vb.handles[0] = save->vb0_handle;
+ cmd->state.gfx.dirty |= KK_DIRTY_VB;
+
+ cmd->state.gfx.occlusion.mode = save->pipeline.gfx.occlusion;
+ cmd->state.gfx.dirty |= KK_DIRTY_OCCLUSION;
+
+ desc->root_dirty = true;
+ } else {
+ cmd->state.cs.local_size = save->pipeline.cs.local_size;
+ cmd->state.cs.pipeline_state = save->pipeline.cs.pipeline_state;
+ }
+
+ memcpy(desc->root.push, save->push, sizeof(save->push));
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
+ VkDeviceSize dstOffset, VkDeviceSize dstRange, uint32_t data)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_buffer, buf, dstBuffer);
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+
+ struct kk_meta_save save;
+ kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
+ mtl_compute_use_resource(kk_compute_encoder(cmd), buf->mtl_handle,
+ MTL_RESOURCE_USAGE_WRITE);
+ vk_meta_fill_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange,
+ data);
+ kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer,
+ VkDeviceSize dstOffset, VkDeviceSize dstRange,
+ const void *pData)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_buffer, buf, dstBuffer);
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+
+ struct kk_meta_save save;
+ kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
+ mtl_compute_use_resource(kk_compute_encoder(cmd), buf->mtl_handle,
+ MTL_RESOURCE_USAGE_WRITE);
+ vk_meta_update_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange,
+ pData);
+ kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdBlitImage2(VkCommandBuffer commandBuffer,
+ const VkBlitImageInfo2 *pBlitImageInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+
+ struct kk_meta_save save;
+ kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
+ vk_meta_blit_image2(&cmd->vk, &dev->meta, pBlitImageInfo);
+ kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdResolveImage2(VkCommandBuffer commandBuffer,
+ const VkResolveImageInfo2 *pResolveImageInfo)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+
+ struct kk_meta_save save;
+ kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
+ vk_meta_resolve_image2(&cmd->vk, &dev->meta, pResolveImageInfo);
+ kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
+}
+
+static void
+kk_meta_init_render(struct kk_cmd_buffer *cmd,
+ struct vk_meta_rendering_info *info)
+{
+ const struct kk_rendering_state *render = &cmd->state.gfx.render;
+
+ *info = (struct vk_meta_rendering_info){
+ .samples = MAX2(render->samples, 1),
+ .view_mask = render->view_mask,
+ .color_attachment_count = render->color_att_count,
+ .depth_attachment_format = render->depth_att.vk_format,
+ .stencil_attachment_format = render->stencil_att.vk_format,
+ };
+ for (uint32_t a = 0; a < render->color_att_count; a++) {
+ info->color_attachment_formats[a] = render->color_att[a].vk_format;
+ info->color_attachment_write_masks[a] =
+ VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
+ const VkClearAttachment *pAttachments,
+ uint32_t rectCount, const VkClearRect *pRects)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+
+ struct vk_meta_rendering_info render_info;
+ kk_meta_init_render(cmd, &render_info);
+
+ uint32_t view_mask = cmd->state.gfx.render.view_mask;
+ struct kk_encoder *encoder = cmd->encoder;
+ uint32_t layer_ids[KK_MAX_MULTIVIEW_VIEW_COUNT] = {};
+ mtl_set_vertex_amplification_count(encoder->main.encoder, layer_ids, 1u);
+
+ struct kk_meta_save save;
+ kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
+ vk_meta_clear_attachments(&cmd->vk, &dev->meta, &render_info,
+ attachmentCount, pAttachments, rectCount, pRects);
+ kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
+
+ uint32_t count = 0u;
+ u_foreach_bit(id, view_mask)
+ layer_ids[count++] = id;
+ if (view_mask == 0u) {
+ layer_ids[count++] = 0;
+ }
+ mtl_set_vertex_amplification_count(encoder->main.encoder, layer_ids, count);
+}
+
+void
+kk_meta_resolve_rendering(struct kk_cmd_buffer *cmd,
+ const VkRenderingInfo *pRenderingInfo)
+{
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+
+ struct kk_meta_save save;
+ kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
+ vk_meta_resolve_rendering(&cmd->vk, &dev->meta, pRenderingInfo);
+ kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS);
+}
diff --git a/src/kosmickrisp/vulkan/kk_cmd_pool.c b/src/kosmickrisp/vulkan/kk_cmd_pool.c
new file mode 100644
index 00000000000..15e571c9723
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_cmd_pool.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_cmd_pool.h"
+
+#include "kk_device.h"
+#include "kk_entrypoints.h"
+#include "kk_physical_device.h"
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateCommandPool(VkDevice _device,
+ const VkCommandPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkCommandPool *pCmdPool)
+{
+ VK_FROM_HANDLE(kk_device, device, _device);
+ struct kk_cmd_pool *pool;
+
+ pool = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (pool == NULL)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ VkResult result =
+ vk_command_pool_init(&device->vk, &pool->vk, pCreateInfo, pAllocator);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, pAllocator, pool);
+ return result;
+ }
+
+ list_inithead(&pool->free_mem);
+ list_inithead(&pool->free_gart_mem);
+
+ *pCmdPool = kk_cmd_pool_to_handle(pool);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_DestroyCommandPool(VkDevice _device, VkCommandPool commandPool,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_device, device, _device);
+ VK_FROM_HANDLE(kk_cmd_pool, pool, commandPool);
+
+ if (!pool)
+ return;
+
+ vk_command_pool_finish(&pool->vk);
+ vk_free2(&device->vk.alloc, pAllocator, pool);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_TrimCommandPool(VkDevice device, VkCommandPool commandPool,
+ VkCommandPoolTrimFlags flags)
+{
+ VK_FROM_HANDLE(kk_cmd_pool, pool, commandPool);
+
+ vk_command_pool_trim(&pool->vk, flags);
+}
diff --git a/src/kosmickrisp/vulkan/kk_cmd_pool.h b/src/kosmickrisp/vulkan/kk_cmd_pool.h
new file mode 100644
index 00000000000..c3a58f78524
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_cmd_pool.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_CMD_POOL_H
+#define KK_CMD_POOL_H
+
+#include "kk_private.h"
+
+#include "vk_command_pool.h"
+
+struct kk_cmd_pool {
+ struct vk_command_pool vk;
+
+ /** List of nvk_cmd_mem */
+ struct list_head free_mem;
+ struct list_head free_gart_mem;
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_cmd_pool, vk.base, VkCommandPool,
+ VK_OBJECT_TYPE_COMMAND_POOL)
+
+static inline struct kk_device *
+kk_cmd_pool_device(struct kk_cmd_pool *pool)
+{
+ return (struct kk_device *)pool->vk.base.device;
+}
+
+#endif /* KK_CMD_POOL_H */
diff --git a/src/kosmickrisp/vulkan/kk_debug.c b/src/kosmickrisp/vulkan/kk_debug.c
new file mode 100644
index 00000000000..dfc0385be41
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_debug.c
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * SPDX-License-Identifier: MIT
+ */
+#include "kk_debug.h"
+#include "util/u_debug.h"
+
+enum kk_debug kk_mesa_debug_flags = 0;
+
+const struct debug_named_value flags[] = {
+ {"nir", KK_DEBUG_NIR},
+ {"msl", KK_DEBUG_MSL},
+ {NULL, 0},
+};
+
+DEBUG_GET_ONCE_FLAGS_OPTION(mesa_kk_debug, "MESA_KK_DEBUG", flags, 0);
+
+void
+kk_process_debug_variable(void)
+{
+ kk_mesa_debug_flags = debug_get_option_mesa_kk_debug();
+}
diff --git a/src/kosmickrisp/vulkan/kk_debug.h b/src/kosmickrisp/vulkan/kk_debug.h
new file mode 100644
index 00000000000..e50b3098c30
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_debug.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef KK_DEBUG_H
+#define KK_DEBUG_H 1
+
+enum kk_debug {
+ /* Print out the NIR from the compiler */
+ KK_DEBUG_NIR = 1ull << 0,
+ /* Print out the generated MSL source code from the compiler */
+ KK_DEBUG_MSL = 1ull << 1,
+};
+
+extern enum kk_debug kk_mesa_debug_flags;
+
+#define KK_DEBUG(flag) unlikely(kk_mesa_debug_flags &KK_DEBUG_##flag)
+
+extern void kk_process_debug_variable(void);
+
+#endif /* KK_DEBUG_H */
diff --git a/src/kosmickrisp/vulkan/kk_descriptor_set.c b/src/kosmickrisp/vulkan/kk_descriptor_set.c
new file mode 100644
index 00000000000..0637fddc812
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_descriptor_set.c
@@ -0,0 +1,806 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_descriptor_set.h"
+
+#include "kk_bo.h"
+#include "kk_buffer.h"
+#include "kk_buffer_view.h"
+#include "kk_descriptor_set_layout.h"
+#include "kk_device.h"
+#include "kk_entrypoints.h"
+#include "kk_image_view.h"
+#include "kk_physical_device.h"
+#include "kk_sampler.h"
+
+#include "util/format/u_format.h"
+
+static inline uint32_t
+align_u32(uint32_t v, uint32_t a)
+{
+ assert(a != 0 && a == (a & -a));
+ return (v + a - 1) & ~(a - 1);
+}
+
+static inline void *
+desc_ubo_data(struct kk_descriptor_set *set, uint32_t binding, uint32_t elem,
+ uint32_t *size_out)
+{
+ const struct kk_descriptor_set_binding_layout *binding_layout =
+ &set->layout->binding[binding];
+
+ uint32_t offset = binding_layout->offset + elem * binding_layout->stride;
+ assert(offset < set->size);
+
+ if (size_out != NULL)
+ *size_out = set->size - offset;
+
+ return (char *)set->mapped_ptr + offset;
+}
+
+static void
+write_desc(struct kk_descriptor_set *set, uint32_t binding, uint32_t elem,
+ const void *desc_data, size_t desc_size)
+{
+ ASSERTED uint32_t dst_size;
+ void *dst = desc_ubo_data(set, binding, elem, &dst_size);
+ assert(desc_size <= dst_size);
+ memcpy(dst, desc_data, desc_size);
+}
+
+static void
+get_sampled_image_view_desc(VkDescriptorType descriptor_type,
+ const VkDescriptorImageInfo *const info, void *dst,
+ size_t dst_size, bool is_input_attachment)
+{
+ struct kk_sampled_image_descriptor desc[3] = {};
+ uint8_t plane_count = 1;
+
+ if (descriptor_type != VK_DESCRIPTOR_TYPE_SAMPLER && info &&
+ info->imageView != VK_NULL_HANDLE) {
+ VK_FROM_HANDLE(kk_image_view, view, info->imageView);
+
+ plane_count = view->plane_count;
+ for (uint8_t plane = 0; plane < plane_count; plane++) {
+ if (is_input_attachment) {
+ assert(view->planes[plane].sampled_gpu_resource_id);
+ desc[plane].image_gpu_resource_id =
+ view->planes[plane].input_gpu_resource_id;
+ } else {
+ assert(view->planes[plane].sampled_gpu_resource_id);
+ desc[plane].image_gpu_resource_id =
+ view->planes[plane].sampled_gpu_resource_id;
+ }
+ }
+ }
+
+ if (descriptor_type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
+ VK_FROM_HANDLE(kk_sampler, sampler, info->sampler);
+
+ plane_count = MAX2(plane_count, sampler->plane_count);
+
+ for (uint8_t plane = 0; plane < plane_count; plane++) {
+ /* We need to replicate the last sampler plane out to all image
+ * planes due to sampler table entry limitations. See
+ * nvk_CreateSampler in nvk_sampler.c for more details.
+ */
+ uint8_t sampler_plane = MIN2(plane, sampler->plane_count - 1u);
+ assert(sampler->planes[sampler_plane].hw->handle);
+ desc[plane].sampler_index = sampler->planes[sampler_plane].hw->index;
+ desc[plane].lod_bias_fp16 = sampler->lod_bias_fp16;
+ desc[plane].lod_min_fp16 = sampler->lod_min_fp16;
+ desc[plane].lod_max_fp16 = sampler->lod_max_fp16;
+ }
+ }
+
+ assert(sizeof(desc[0]) * plane_count <= dst_size);
+ memcpy(dst, desc, sizeof(desc[0]) * plane_count);
+}
+
+static void
+write_sampled_image_view_desc(struct kk_descriptor_set *set,
+ const VkDescriptorImageInfo *const _info,
+ uint32_t binding, uint32_t elem,
+ VkDescriptorType descriptor_type)
+{
+ VkDescriptorImageInfo info = *_info;
+
+ struct kk_descriptor_set_binding_layout *binding_layout =
+ &set->layout->binding[binding];
+ if (descriptor_type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
+ if (binding_layout->immutable_samplers != NULL) {
+ info.sampler =
+ kk_sampler_to_handle(binding_layout->immutable_samplers[elem]);
+ }
+ }
+
+ uint32_t dst_size;
+ void *dst = desc_ubo_data(set, binding, elem, &dst_size);
+ get_sampled_image_view_desc(
+ descriptor_type, &info, dst, dst_size,
+ descriptor_type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT);
+}
+
+static void
+get_storage_image_view_desc(
+ struct kk_descriptor_set_binding_layout *binding_layout,
+ const VkDescriptorImageInfo *const info, void *dst, size_t dst_size)
+{
+ struct kk_storage_image_descriptor desc = {};
+
+ if (info && info->imageView != VK_NULL_HANDLE) {
+ VK_FROM_HANDLE(kk_image_view, view, info->imageView);
+
+ /* Storage images are always single plane */
+ assert(view->plane_count == 1);
+ uint8_t plane = 0;
+
+ assert(view->planes[plane].storage_gpu_resource_id);
+ desc.image_gpu_resource_id = view->planes[plane].storage_gpu_resource_id;
+ }
+
+ assert(sizeof(desc) <= dst_size);
+ memcpy(dst, &desc, sizeof(desc));
+}
+
+static void
+write_storage_image_view_desc(struct kk_descriptor_set *set,
+ const VkDescriptorImageInfo *const info,
+ uint32_t binding, uint32_t elem)
+{
+ uint32_t dst_size;
+ void *dst = desc_ubo_data(set, binding, elem, &dst_size);
+ struct kk_descriptor_set_binding_layout *binding_layout =
+ &set->layout->binding[binding];
+ get_storage_image_view_desc(binding_layout, info, dst, dst_size);
+}
+
+static void
+write_buffer_desc(struct kk_descriptor_set *set,
+ const VkDescriptorBufferInfo *const info, uint32_t binding,
+ uint32_t elem)
+{
+ VK_FROM_HANDLE(kk_buffer, buffer, info->buffer);
+
+ const struct kk_addr_range addr_range =
+ kk_buffer_addr_range(buffer, info->offset, info->range);
+ assert(addr_range.range <= UINT32_MAX);
+
+ const struct kk_buffer_address desc = {
+ .base_addr = addr_range.addr,
+ .size = addr_range.range,
+ };
+ write_desc(set, binding, elem, &desc, sizeof(desc));
+}
+
+static void
+write_dynamic_buffer_desc(struct kk_descriptor_set *set,
+ const VkDescriptorBufferInfo *const info,
+ uint32_t binding, uint32_t elem)
+{
+ VK_FROM_HANDLE(kk_buffer, buffer, info->buffer);
+ const struct kk_descriptor_set_binding_layout *binding_layout =
+ &set->layout->binding[binding];
+
+ const struct kk_addr_range addr_range =
+ kk_buffer_addr_range(buffer, info->offset, info->range);
+ assert(addr_range.range <= UINT32_MAX);
+
+ struct kk_buffer_address *desc =
+ &set->dynamic_buffers[binding_layout->dynamic_buffer_index + elem];
+ *desc = (struct kk_buffer_address){
+ .base_addr = addr_range.addr,
+ .size = addr_range.range,
+ };
+}
+
+static void
+write_buffer_view_desc(struct kk_descriptor_set *set,
+ const VkBufferView bufferView, uint32_t binding,
+ uint32_t elem)
+{
+ struct kk_storage_image_descriptor desc = {};
+ if (bufferView != VK_NULL_HANDLE) {
+ VK_FROM_HANDLE(kk_buffer_view, view, bufferView);
+
+ assert(view->mtl_texel_buffer_handle);
+ assert(view->texel_buffer_gpu_id);
+
+ desc.image_gpu_resource_id = view->texel_buffer_gpu_id;
+ }
+ write_desc(set, binding, elem, &desc, sizeof(desc));
+}
+
+static void
+write_inline_uniform_data(struct kk_descriptor_set *set,
+ const VkWriteDescriptorSetInlineUniformBlock *info,
+ uint32_t binding, uint32_t offset)
+{
+ assert(set->layout->binding[binding].stride == 1);
+ write_desc(set, binding, offset, info->pData, info->dataSize);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_UpdateDescriptorSets(VkDevice device, uint32_t descriptorWriteCount,
+ const VkWriteDescriptorSet *pDescriptorWrites,
+ uint32_t descriptorCopyCount,
+ const VkCopyDescriptorSet *pDescriptorCopies)
+{
+ for (uint32_t w = 0; w < descriptorWriteCount; w++) {
+ const VkWriteDescriptorSet *write = &pDescriptorWrites[w];
+ VK_FROM_HANDLE(kk_descriptor_set, set, write->dstSet);
+
+ switch (write->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ for (uint32_t j = 0; j < write->descriptorCount; j++) {
+ write_sampled_image_view_desc(
+ set, write->pImageInfo + j, write->dstBinding,
+ write->dstArrayElement + j, write->descriptorType);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ for (uint32_t j = 0; j < write->descriptorCount; j++) {
+ write_storage_image_view_desc(set, write->pImageInfo + j,
+ write->dstBinding,
+ write->dstArrayElement + j);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ for (uint32_t j = 0; j < write->descriptorCount; j++) {
+ write_buffer_view_desc(set, write->pTexelBufferView[j],
+ write->dstBinding,
+ write->dstArrayElement + j);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ for (uint32_t j = 0; j < write->descriptorCount; j++) {
+ write_buffer_desc(set, write->pBufferInfo + j, write->dstBinding,
+ write->dstArrayElement + j);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ for (uint32_t j = 0; j < write->descriptorCount; j++) {
+ write_dynamic_buffer_desc(set, write->pBufferInfo + j,
+ write->dstBinding,
+ write->dstArrayElement + j);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
+ const VkWriteDescriptorSetInlineUniformBlock *write_inline =
+ vk_find_struct_const(write->pNext,
+ WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK);
+ assert(write_inline->dataSize == write->descriptorCount);
+ write_inline_uniform_data(set, write_inline, write->dstBinding,
+ write->dstArrayElement);
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ for (uint32_t i = 0; i < descriptorCopyCount; i++) {
+ const VkCopyDescriptorSet *copy = &pDescriptorCopies[i];
+ VK_FROM_HANDLE(kk_descriptor_set, src, copy->srcSet);
+ VK_FROM_HANDLE(kk_descriptor_set, dst, copy->dstSet);
+
+ const struct kk_descriptor_set_binding_layout *src_binding_layout =
+ &src->layout->binding[copy->srcBinding];
+ const struct kk_descriptor_set_binding_layout *dst_binding_layout =
+ &dst->layout->binding[copy->dstBinding];
+
+ if (dst_binding_layout->stride > 0 && src_binding_layout->stride > 0) {
+ for (uint32_t j = 0; j < copy->descriptorCount; j++) {
+ ASSERTED uint32_t dst_max_size, src_max_size;
+ void *dst_map = desc_ubo_data(
+ dst, copy->dstBinding, copy->dstArrayElement + j, &dst_max_size);
+ const void *src_map = desc_ubo_data(
+ src, copy->srcBinding, copy->srcArrayElement + j, &src_max_size);
+ const uint32_t copy_size =
+ MIN2(dst_binding_layout->stride, src_binding_layout->stride);
+ assert(copy_size <= dst_max_size && copy_size <= src_max_size);
+ memcpy(dst_map, src_map, copy_size);
+ }
+ }
+
+ switch (src_binding_layout->type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+ const uint32_t dst_dyn_start =
+ dst_binding_layout->dynamic_buffer_index + copy->dstArrayElement;
+ const uint32_t src_dyn_start =
+ src_binding_layout->dynamic_buffer_index + copy->srcArrayElement;
+ typed_memcpy(&dst->dynamic_buffers[dst_dyn_start],
+ &src->dynamic_buffers[src_dyn_start],
+ copy->descriptorCount);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+void
+kk_push_descriptor_set_update(struct kk_push_descriptor_set *push_set,
+ uint32_t write_count,
+ const VkWriteDescriptorSet *writes)
+{
+ struct kk_descriptor_set_layout *layout = push_set->layout;
+ assert(layout->non_variable_descriptor_buffer_size < sizeof(push_set->data));
+ struct kk_descriptor_set set = {
+ .layout = push_set->layout,
+ .size = sizeof(push_set->data),
+ .mapped_ptr = push_set->data,
+ };
+
+ for (uint32_t w = 0; w < write_count; w++) {
+ const VkWriteDescriptorSet *write = &writes[w];
+ assert(write->dstSet == VK_NULL_HANDLE);
+
+ switch (write->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ for (uint32_t j = 0; j < write->descriptorCount; j++) {
+ write_sampled_image_view_desc(
+ &set, write->pImageInfo + j, write->dstBinding,
+ write->dstArrayElement + j, write->descriptorType);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ for (uint32_t j = 0; j < write->descriptorCount; j++) {
+ write_storage_image_view_desc(&set, write->pImageInfo + j,
+ write->dstBinding,
+ write->dstArrayElement + j);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ for (uint32_t j = 0; j < write->descriptorCount; j++) {
+ write_buffer_view_desc(&set, write->pTexelBufferView[j],
+ write->dstBinding,
+ write->dstArrayElement + j);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ for (uint32_t j = 0; j < write->descriptorCount; j++) {
+ write_buffer_desc(&set, write->pBufferInfo + j, write->dstBinding,
+ write->dstArrayElement + j);
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+static void kk_descriptor_pool_free(struct kk_descriptor_pool *pool,
+ uint64_t addr, uint64_t size);
+
+static void
+kk_descriptor_set_destroy(struct kk_device *dev,
+ struct kk_descriptor_pool *pool,
+ struct kk_descriptor_set *set)
+{
+ list_del(&set->link);
+ if (set->size > 0)
+ kk_descriptor_pool_free(pool, set->addr, set->size);
+ vk_descriptor_set_layout_unref(&dev->vk, &set->layout->vk);
+
+ vk_object_free(&dev->vk, NULL, set);
+}
+
+static void
+kk_destroy_descriptor_pool(struct kk_device *dev,
+ const VkAllocationCallbacks *pAllocator,
+ struct kk_descriptor_pool *pool)
+{
+ list_for_each_entry_safe(struct kk_descriptor_set, set, &pool->sets, link)
+ kk_descriptor_set_destroy(dev, pool, set);
+
+ util_vma_heap_finish(&pool->heap);
+
+ if (pool->bo != NULL)
+ kk_destroy_bo(dev, pool->bo);
+
+ vk_object_free(&dev->vk, pAllocator, pool);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateDescriptorPool(VkDevice _device,
+ const VkDescriptorPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorPool *pDescriptorPool)
+{
+ VK_FROM_HANDLE(kk_device, dev, _device);
+ struct kk_descriptor_pool *pool;
+ VkResult result = VK_SUCCESS;
+
+ pool = vk_object_zalloc(&dev->vk, pAllocator, sizeof(*pool),
+ VK_OBJECT_TYPE_DESCRIPTOR_POOL);
+ if (!pool)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ list_inithead(&pool->sets);
+
+ const VkMutableDescriptorTypeCreateInfoEXT *mutable_info =
+ vk_find_struct_const(pCreateInfo->pNext,
+ MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT);
+
+ uint32_t max_align = 0;
+ for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
+ const VkMutableDescriptorTypeListEXT *type_list = NULL;
+ if (pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT &&
+ mutable_info && i < mutable_info->mutableDescriptorTypeListCount)
+ type_list = &mutable_info->pMutableDescriptorTypeLists[i];
+
+ uint32_t stride, alignment;
+ kk_descriptor_stride_align_for_type(pCreateInfo->pPoolSizes[i].type,
+ type_list, &stride, &alignment);
+ max_align = MAX2(max_align, alignment);
+ }
+
+ uint64_t mem_size = 0;
+ for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
+ const VkMutableDescriptorTypeListEXT *type_list = NULL;
+ if (pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT &&
+ mutable_info && i < mutable_info->mutableDescriptorTypeListCount)
+ type_list = &mutable_info->pMutableDescriptorTypeLists[i];
+
+ uint32_t stride, alignment;
+ kk_descriptor_stride_align_for_type(pCreateInfo->pPoolSizes[i].type,
+ type_list, &stride, &alignment);
+ mem_size +=
+ MAX2(stride, max_align) * pCreateInfo->pPoolSizes[i].descriptorCount;
+ }
+
+ /* Individual descriptor sets are aligned to the min UBO alignment to
+ * ensure that we don't end up with unaligned data access in any shaders.
+ * This means that each descriptor buffer allocated may burn up to 16B of
+ * extra space to get the right alignment. (Technically, it's at most 28B
+ * because we're always going to start at least 4B aligned but we're being
+ * conservative here.) Allocate enough extra space that we can chop it
+ * into maxSets pieces and align each one of them to 32B.
+ */
+ mem_size += kk_min_cbuf_alignment() * pCreateInfo->maxSets;
+
+ if (mem_size) {
+ result = kk_alloc_bo(dev, &dev->vk.base, mem_size, 0u, &pool->bo);
+ if (result != VK_SUCCESS) {
+ kk_destroy_descriptor_pool(dev, pAllocator, pool);
+ return result;
+ }
+
+ /* The BO may be larger thanks to GPU page alignment. We may as well
+ * make that extra space available to the client.
+ */
+ assert(pool->bo->size_B >= mem_size);
+ util_vma_heap_init(&pool->heap, pool->bo->gpu, pool->bo->size_B);
+ } else {
+ util_vma_heap_init(&pool->heap, 0, 0);
+ }
+
+ *pDescriptorPool = kk_descriptor_pool_to_handle(pool);
+ return result;
+}
+
+static VkResult
+kk_descriptor_pool_alloc(struct kk_descriptor_pool *pool, uint64_t size,
+ uint64_t alignment, uint64_t *addr_out, void **map_out)
+{
+ assert(size > 0);
+ assert(size % alignment == 0);
+
+ if (size > pool->heap.free_size)
+ return VK_ERROR_OUT_OF_POOL_MEMORY;
+
+ uint64_t addr = util_vma_heap_alloc(&pool->heap, size, alignment);
+ if (addr == 0)
+ return VK_ERROR_FRAGMENTED_POOL;
+
+ assert(addr >= pool->bo->gpu);
+ assert(addr + size <= pool->bo->gpu + pool->bo->size_B);
+ uint64_t offset = addr - pool->bo->gpu;
+
+ *addr_out = addr;
+ *map_out = pool->bo->cpu + offset;
+
+ return VK_SUCCESS;
+}
+
+static void
+kk_descriptor_pool_free(struct kk_descriptor_pool *pool, uint64_t addr,
+ uint64_t size)
+{
+ assert(size > 0);
+ assert(addr >= pool->bo->gpu);
+ assert(addr + size <= pool->bo->gpu + pool->bo->size_B);
+ util_vma_heap_free(&pool->heap, addr, size);
+}
+
+static VkResult
+kk_descriptor_set_create(struct kk_device *dev, struct kk_descriptor_pool *pool,
+ struct kk_descriptor_set_layout *layout,
+ uint32_t variable_count,
+ struct kk_descriptor_set **out_set)
+{
+ struct kk_descriptor_set *set;
+ VkResult result = VK_SUCCESS;
+
+ uint32_t mem_size =
+ sizeof(struct kk_descriptor_set) +
+ layout->dynamic_buffer_count * sizeof(struct kk_buffer_address);
+ set =
+ vk_object_zalloc(&dev->vk, NULL, mem_size, VK_OBJECT_TYPE_DESCRIPTOR_SET);
+ if (!set)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ set->size = layout->non_variable_descriptor_buffer_size;
+
+ if (layout->binding_count > 0 &&
+ (layout->binding[layout->binding_count - 1].flags &
+ VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
+ uint32_t stride = layout->binding[layout->binding_count - 1].stride;
+ set->size += stride * variable_count;
+ }
+
+ uint32_t alignment = kk_min_cbuf_alignment();
+ set->size = align64(set->size, alignment);
+
+ if (set->size > 0) {
+ result = kk_descriptor_pool_alloc(pool, set->size, alignment, &set->addr,
+ &set->mapped_ptr);
+ if (result != VK_SUCCESS) {
+ vk_object_free(&dev->vk, NULL, set);
+ return result;
+ }
+ set->mtl_descriptor_buffer = pool->bo->map;
+ }
+
+ vk_descriptor_set_layout_ref(&layout->vk);
+ set->layout = layout;
+
+ for (uint32_t b = 0; b < layout->binding_count; b++) {
+ if (layout->binding[b].type != VK_DESCRIPTOR_TYPE_SAMPLER &&
+ layout->binding[b].type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ continue;
+
+ if (layout->binding[b].immutable_samplers == NULL)
+ continue;
+
+ uint32_t array_size = layout->binding[b].array_size;
+ if (layout->binding[b].flags &
+ VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)
+ array_size = variable_count;
+
+ const VkDescriptorImageInfo empty = {};
+ for (uint32_t j = 0; j < array_size; j++) {
+ write_sampled_image_view_desc(set, &empty, b, j,
+ layout->binding[b].type);
+ }
+ }
+
+ list_addtail(&set->link, &pool->sets);
+ *out_set = set;
+
+ return result;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_AllocateDescriptorSets(VkDevice device,
+ const VkDescriptorSetAllocateInfo *pAllocateInfo,
+ VkDescriptorSet *pDescriptorSets)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_descriptor_pool, pool, pAllocateInfo->descriptorPool);
+
+ VkResult result = VK_SUCCESS;
+ uint32_t i;
+
+ struct kk_descriptor_set *set = NULL;
+
+ const VkDescriptorSetVariableDescriptorCountAllocateInfo *var_desc_count =
+ vk_find_struct_const(
+ pAllocateInfo->pNext,
+ DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO);
+
+ /* allocate a set of buffers for each shader to contain descriptors */
+ for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
+ VK_FROM_HANDLE(kk_descriptor_set_layout, layout,
+ pAllocateInfo->pSetLayouts[i]);
+ /* If descriptorSetCount is zero or this structure is not included in
+ * the pNext chain, then the variable lengths are considered to be zero.
+ */
+ const uint32_t variable_count =
+ var_desc_count && var_desc_count->descriptorSetCount > 0
+ ? var_desc_count->pDescriptorCounts[i]
+ : 0;
+
+ result =
+ kk_descriptor_set_create(dev, pool, layout, variable_count, &set);
+ if (result != VK_SUCCESS)
+ break;
+
+ pDescriptorSets[i] = kk_descriptor_set_to_handle(set);
+ }
+
+ if (result != VK_SUCCESS) {
+ kk_FreeDescriptorSets(device, pAllocateInfo->descriptorPool, i,
+ pDescriptorSets);
+ for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
+ pDescriptorSets[i] = VK_NULL_HANDLE;
+ }
+ }
+ return result;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_FreeDescriptorSets(VkDevice device, VkDescriptorPool descriptorPool,
+ uint32_t descriptorSetCount,
+ const VkDescriptorSet *pDescriptorSets)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_descriptor_pool, pool, descriptorPool);
+
+ for (uint32_t i = 0; i < descriptorSetCount; i++) {
+ VK_FROM_HANDLE(kk_descriptor_set, set, pDescriptorSets[i]);
+
+ if (set)
+ kk_descriptor_set_destroy(dev, pool, set);
+ }
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_DestroyDescriptorPool(VkDevice device, VkDescriptorPool _pool,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_descriptor_pool, pool, _pool);
+
+ if (!_pool)
+ return;
+
+ kk_destroy_descriptor_pool(dev, pAllocator, pool);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_ResetDescriptorPool(VkDevice device, VkDescriptorPool descriptorPool,
+ VkDescriptorPoolResetFlags flags)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_descriptor_pool, pool, descriptorPool);
+
+ list_for_each_entry_safe(struct kk_descriptor_set, set, &pool->sets, link)
+ kk_descriptor_set_destroy(dev, pool, set);
+
+ return VK_SUCCESS;
+}
+
+static void
+kk_descriptor_set_write_template(
+ struct kk_descriptor_set *set,
+ const struct vk_descriptor_update_template *template, const void *data)
+{
+ for (uint32_t i = 0; i < template->entry_count; i++) {
+ const struct vk_descriptor_template_entry *entry = &template->entries[i];
+
+ switch (entry->type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ for (uint32_t j = 0; j < entry->array_count; j++) {
+ const VkDescriptorImageInfo *info =
+ data + entry->offset + j * entry->stride;
+
+ write_sampled_image_view_desc(set, info, entry->binding,
+ entry->array_element + j,
+ entry->type);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ for (uint32_t j = 0; j < entry->array_count; j++) {
+ const VkDescriptorImageInfo *info =
+ data + entry->offset + j * entry->stride;
+
+ write_storage_image_view_desc(set, info, entry->binding,
+ entry->array_element + j);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ for (uint32_t j = 0; j < entry->array_count; j++) {
+ const VkBufferView *bview =
+ data + entry->offset + j * entry->stride;
+
+ write_buffer_view_desc(set, *bview, entry->binding,
+ entry->array_element + j);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ for (uint32_t j = 0; j < entry->array_count; j++) {
+ const VkDescriptorBufferInfo *info =
+ data + entry->offset + j * entry->stride;
+
+ write_buffer_desc(set, info, entry->binding,
+ entry->array_element + j);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ for (uint32_t j = 0; j < entry->array_count; j++) {
+ const VkDescriptorBufferInfo *info =
+ data + entry->offset + j * entry->stride;
+
+ write_dynamic_buffer_desc(set, info, entry->binding,
+ entry->array_element + j);
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
+ write_desc(set, entry->binding, entry->array_element,
+ data + entry->offset, entry->array_count);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_UpdateDescriptorSetWithTemplate(
+ VkDevice device, VkDescriptorSet descriptorSet,
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData)
+{
+ VK_FROM_HANDLE(kk_descriptor_set, set, descriptorSet);
+ VK_FROM_HANDLE(vk_descriptor_update_template, template,
+ descriptorUpdateTemplate);
+
+ kk_descriptor_set_write_template(set, template, pData);
+}
+
+void
+kk_push_descriptor_set_update_template(
+ struct kk_push_descriptor_set *push_set,
+ struct kk_descriptor_set_layout *layout,
+ const struct vk_descriptor_update_template *template, const void *data)
+{
+ struct kk_descriptor_set tmp_set = {
+ .layout = layout,
+ .size = sizeof(push_set->data),
+ .mapped_ptr = push_set->data,
+ };
+ kk_descriptor_set_write_template(&tmp_set, template, data);
+}
diff --git a/src/kosmickrisp/vulkan/kk_descriptor_set.h b/src/kosmickrisp/vulkan/kk_descriptor_set.h
new file mode 100644
index 00000000000..d1652448a41
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_descriptor_set.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_DESCRIPTOR_SET
+#define KK_DESCRIPTOR_SET 1
+
+#include "kk_private.h"
+
+#include "kk_descriptor_types.h"
+#include "kk_device.h"
+
+#include "vk_descriptor_update_template.h"
+#include "vk_object.h"
+
+#include "util/list.h"
+#include "util/vma.h"
+
+struct kk_descriptor_set_layout;
+struct kk_bo;
+
+struct kk_descriptor_pool {
+ struct vk_object_base base;
+
+ struct list_head sets;
+
+ struct kk_bo *bo;
+ struct util_vma_heap heap;
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_descriptor_pool, base, VkDescriptorPool,
+ VK_OBJECT_TYPE_DESCRIPTOR_POOL)
+
+struct kk_descriptor_set {
+ struct vk_object_base base;
+
+ /* Link in kk_descriptor_pool::sets */
+ struct list_head link;
+
+ struct kk_descriptor_set_layout *layout;
+ mtl_resource *mtl_descriptor_buffer;
+ void *mapped_ptr;
+ uint64_t addr;
+ uint32_t size;
+
+ struct kk_buffer_address dynamic_buffers[];
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_descriptor_set, base, VkDescriptorSet,
+ VK_OBJECT_TYPE_DESCRIPTOR_SET)
+
+static inline struct kk_buffer_address
+kk_descriptor_set_addr(const struct kk_descriptor_set *set)
+{
+ return (struct kk_buffer_address){
+ .base_addr = set->addr,
+ .size = set->size,
+ };
+}
+
+struct kk_push_descriptor_set {
+ uint8_t data[KK_PUSH_DESCRIPTOR_SET_SIZE];
+ struct kk_descriptor_set_layout *layout;
+ mtl_resource *mtl_descriptor_buffer;
+ uint32_t resource_count;
+ mtl_resource *mtl_resources[];
+};
+
+void kk_push_descriptor_set_update(struct kk_push_descriptor_set *push_set,
+ uint32_t write_count,
+ const VkWriteDescriptorSet *writes);
+
+void kk_push_descriptor_set_update_template(
+ struct kk_push_descriptor_set *push_set,
+ struct kk_descriptor_set_layout *layout,
+ const struct vk_descriptor_update_template *template, const void *data);
+
+#endif
diff --git a/src/kosmickrisp/vulkan/kk_descriptor_set_layout.c b/src/kosmickrisp/vulkan/kk_descriptor_set_layout.c
new file mode 100644
index 00000000000..11b4c98b9f6
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_descriptor_set_layout.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_descriptor_set_layout.h"
+
+#include "kk_descriptor_types.h"
+#include "kk_device.h"
+#include "kk_entrypoints.h"
+#include "kk_physical_device.h"
+#include "kk_sampler.h"
+
+#include "vk_pipeline_layout.h"
+
+static bool
+binding_has_immutable_samplers(const VkDescriptorSetLayoutBinding *binding)
+{
+ switch (binding->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ return binding->pImmutableSamplers != NULL;
+
+ default:
+ return false;
+ }
+}
+
+void
+kk_descriptor_stride_align_for_type(
+ VkDescriptorType type, const VkMutableDescriptorTypeListEXT *type_list,
+ uint32_t *stride, uint32_t *alignment)
+{
+ switch (type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ /* TODO: How do samplers work? */
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ *stride = *alignment = sizeof(struct kk_sampled_image_descriptor);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ *stride = *alignment = sizeof(struct kk_storage_image_descriptor);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ *stride = *alignment = sizeof(struct kk_buffer_address);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ *stride = *alignment = 0; /* These don't take up buffer space */
+ break;
+
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
+ *stride = 1; /* Array size is bytes */
+ *alignment = kk_min_cbuf_alignment();
+ break;
+
+ case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
+ *stride = *alignment = 0;
+ if (type_list == NULL)
+ *stride = *alignment = KK_MAX_DESCRIPTOR_SIZE;
+ for (unsigned i = 0; type_list && i < type_list->descriptorTypeCount;
+ i++) {
+ /* This shouldn't recurse */
+ assert(type_list->pDescriptorTypes[i] !=
+ VK_DESCRIPTOR_TYPE_MUTABLE_EXT);
+ uint32_t desc_stride, desc_align;
+ kk_descriptor_stride_align_for_type(type_list->pDescriptorTypes[i],
+ NULL, &desc_stride, &desc_align);
+ *stride = MAX2(*stride, desc_stride);
+ *alignment = MAX2(*alignment, desc_align);
+ }
+ *stride = ALIGN(*stride, *alignment);
+ break;
+
+ default:
+ UNREACHABLE("Invalid descriptor type");
+ }
+
+ assert(*stride <= KK_MAX_DESCRIPTOR_SIZE);
+}
+
+static const VkMutableDescriptorTypeListEXT *
+kk_descriptor_get_type_list(VkDescriptorType type,
+ const VkMutableDescriptorTypeCreateInfoEXT *info,
+ const uint32_t info_idx)
+{
+ const VkMutableDescriptorTypeListEXT *type_list = NULL;
+ if (type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) {
+ assert(info != NULL);
+ assert(info_idx < info->mutableDescriptorTypeListCount);
+ type_list = &info->pMutableDescriptorTypeLists[info_idx];
+ }
+ return type_list;
+}
+
+static void
+kk_descriptor_set_layout_destroy(struct vk_device *vk_dev,
+ struct vk_descriptor_set_layout *vk_layout)
+{
+ struct kk_device *dev = container_of(vk_dev, struct kk_device, vk);
+ struct kk_descriptor_set_layout *layout =
+ vk_to_kk_descriptor_set_layout(vk_layout);
+
+ vk_object_free(&dev->vk, NULL, layout);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateDescriptorSetLayout(VkDevice device,
+ const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorSetLayout *pSetLayout)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+
+ uint32_t num_bindings = 0;
+ uint32_t immutable_sampler_count = 0;
+ for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
+ const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j];
+ num_bindings = MAX2(num_bindings, binding->binding + 1);
+
+ /* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding:
+ *
+ * "If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or
+ * VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then
+ * pImmutableSamplers can be used to initialize a set of immutable
+ * samplers. [...] If descriptorType is not one of these descriptor
+ * types, then pImmutableSamplers is ignored.
+ *
+ * We need to be careful here and only parse pImmutableSamplers if we
+ * have one of the right descriptor types.
+ */
+ if (binding_has_immutable_samplers(binding))
+ immutable_sampler_count += binding->descriptorCount;
+ }
+
+ VK_MULTIALLOC(ma);
+ VK_MULTIALLOC_DECL(&ma, struct kk_descriptor_set_layout, layout, 1);
+ VK_MULTIALLOC_DECL(&ma, struct kk_descriptor_set_binding_layout, bindings,
+ num_bindings);
+ VK_MULTIALLOC_DECL(&ma, struct kk_sampler *, samplers,
+ immutable_sampler_count);
+
+ if (!vk_descriptor_set_layout_multizalloc(&dev->vk, &ma, pCreateInfo))
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ layout->vk.destroy = kk_descriptor_set_layout_destroy;
+ layout->flags = pCreateInfo->flags;
+ layout->binding_count = num_bindings;
+
+ for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
+ const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j];
+ uint32_t b = binding->binding;
+ /* We temporarily store pCreateInfo->pBindings[] index (plus one) in the
+ * immutable_samplers pointer. This provides us with a quick-and-dirty
+ * way to sort the bindings by binding number.
+ */
+ layout->binding[b].immutable_samplers = (void *)(uintptr_t)(j + 1);
+ }
+
+ const VkDescriptorSetLayoutBindingFlagsCreateInfo *binding_flags_info =
+ vk_find_struct_const(pCreateInfo->pNext,
+ DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
+ const VkMutableDescriptorTypeCreateInfoEXT *mutable_info =
+ vk_find_struct_const(pCreateInfo->pNext,
+ MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT);
+
+ uint32_t buffer_size = 0;
+ uint32_t max_variable_descriptor_size = 0;
+ uint8_t dynamic_buffer_count = 0;
+ uint32_t total_descriptor_count = 0u;
+ for (uint32_t b = 0; b < num_bindings; b++) {
+ /* We stashed the pCreateInfo->pBindings[] index (plus one) in the
+ * immutable_samplers pointer. Check for NULL (empty binding) and then
+ * reset it and compute the index.
+ */
+ if (layout->binding[b].immutable_samplers == NULL)
+ continue;
+ const uint32_t info_idx =
+ (uintptr_t)(void *)layout->binding[b].immutable_samplers - 1;
+ layout->binding[b].immutable_samplers = NULL;
+
+ const VkDescriptorSetLayoutBinding *binding =
+ &pCreateInfo->pBindings[info_idx];
+
+ if (binding->descriptorCount == 0)
+ continue;
+
+ layout->binding[b].type = binding->descriptorType;
+ layout->binding[b].mtl_resources_index = total_descriptor_count;
+ layout->descriptor_count += binding->descriptorCount;
+
+ if (binding_flags_info && binding_flags_info->bindingCount > 0) {
+ assert(binding_flags_info->bindingCount == pCreateInfo->bindingCount);
+ layout->binding[b].flags = binding_flags_info->pBindingFlags[info_idx];
+ }
+
+ layout->binding[b].array_size = binding->descriptorCount;
+
+ switch (binding->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ layout->binding[b].dynamic_buffer_index = dynamic_buffer_count;
+ BITSET_SET_RANGE(layout->dynamic_ubos, dynamic_buffer_count,
+ dynamic_buffer_count + binding->descriptorCount - 1);
+ dynamic_buffer_count += binding->descriptorCount;
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ layout->binding[b].dynamic_buffer_index = dynamic_buffer_count;
+ dynamic_buffer_count += binding->descriptorCount;
+ break;
+
+ default:
+ break;
+ }
+
+ const VkMutableDescriptorTypeListEXT *type_list =
+ kk_descriptor_get_type_list(binding->descriptorType, mutable_info,
+ info_idx);
+
+ uint32_t stride, alignment;
+ kk_descriptor_stride_align_for_type(binding->descriptorType, type_list,
+ &stride, &alignment);
+
+ uint8_t max_plane_count = 1;
+
+ if (binding_has_immutable_samplers(binding)) {
+ layout->binding[b].immutable_samplers = samplers;
+ samplers += binding->descriptorCount;
+ for (uint32_t i = 0; i < binding->descriptorCount; i++) {
+ VK_FROM_HANDLE(kk_sampler, sampler, binding->pImmutableSamplers[i]);
+ layout->binding[b].immutable_samplers[i] = sampler;
+ const uint8_t sampler_plane_count =
+ sampler->vk.ycbcr_conversion
+ ? vk_format_get_plane_count(
+ sampler->vk.ycbcr_conversion->state.format)
+ : 1;
+ if (max_plane_count < sampler_plane_count)
+ max_plane_count = sampler_plane_count;
+ }
+ }
+
+ stride *= max_plane_count;
+ layout->binding[b].count_per_element = max_plane_count;
+ total_descriptor_count += max_plane_count * binding->descriptorCount;
+
+ if (stride > 0) {
+ assert(stride <= UINT8_MAX);
+ assert(util_is_power_of_two_nonzero(alignment));
+
+ buffer_size = align64(buffer_size, alignment);
+ layout->binding[b].offset = buffer_size;
+ layout->binding[b].stride = stride;
+
+ if (layout->binding[b].flags &
+ VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT) {
+ /* From the Vulkan 1.3.256 spec:
+ *
+ * VUID-VkDescriptorSetLayoutBindingFlagsCreateInfo-pBindingFlags-03004
+ * "If an element of pBindingFlags includes
+ * VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT, then
+ * all other elements of
+ * VkDescriptorSetLayoutCreateInfo::pBindings must have a
+ * smaller value of binding"
+ *
+ * In other words, it has to be the last binding.
+ */
+ assert(b == num_bindings - 1);
+ assert(max_variable_descriptor_size == 0);
+ max_variable_descriptor_size = stride * binding->descriptorCount;
+ } else {
+ /* the allocation size will be computed at descriptor allocation,
+ * but the buffer size will be already aligned as this binding will
+ * be the last
+ */
+ buffer_size += stride * binding->descriptorCount;
+ }
+ }
+ }
+
+ layout->non_variable_descriptor_buffer_size = buffer_size;
+ layout->max_buffer_size = buffer_size + max_variable_descriptor_size;
+ layout->dynamic_buffer_count = dynamic_buffer_count;
+
+ struct mesa_blake3 blake3_ctx;
+ _mesa_blake3_init(&blake3_ctx);
+
+#define BLAKE3_UPDATE_VALUE(x) \
+ _mesa_blake3_update(&blake3_ctx, &(x), sizeof(x));
+ BLAKE3_UPDATE_VALUE(layout->non_variable_descriptor_buffer_size);
+ BLAKE3_UPDATE_VALUE(layout->dynamic_buffer_count);
+ BLAKE3_UPDATE_VALUE(layout->binding_count);
+
+ for (uint32_t b = 0; b < num_bindings; b++) {
+ BLAKE3_UPDATE_VALUE(layout->binding[b].type);
+ BLAKE3_UPDATE_VALUE(layout->binding[b].flags);
+ BLAKE3_UPDATE_VALUE(layout->binding[b].array_size);
+ BLAKE3_UPDATE_VALUE(layout->binding[b].offset);
+ BLAKE3_UPDATE_VALUE(layout->binding[b].stride);
+ BLAKE3_UPDATE_VALUE(layout->binding[b].dynamic_buffer_index);
+
+ if (layout->binding[b].immutable_samplers != NULL) {
+ for (uint32_t i = 0; i < layout->binding[b].array_size; i++) {
+ const struct kk_sampler *sampler =
+ layout->binding[b].immutable_samplers[i];
+
+ /* We zalloc the object, so it's safe to hash the whole thing */
+ if (sampler != NULL && sampler->vk.ycbcr_conversion != NULL)
+ BLAKE3_UPDATE_VALUE(sampler->vk.ycbcr_conversion->state);
+ }
+ }
+ }
+#undef BLAKE3_UPDATE_VALUE
+
+ _mesa_blake3_final(&blake3_ctx, layout->vk.blake3);
+
+ if (pCreateInfo->flags &
+ VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT) {
+ void *sampler_desc_data =
+ vk_alloc2(&dev->vk.alloc, pAllocator, buffer_size, 4,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (sampler_desc_data == NULL) {
+ kk_descriptor_set_layout_destroy(&dev->vk, &layout->vk);
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ for (uint32_t b = 0; b < num_bindings; b++) {
+ assert(layout->binding[b].type == VK_DESCRIPTOR_TYPE_SAMPLER);
+ assert(layout->binding[b].array_size == 1);
+ assert(layout->binding[b].immutable_samplers != NULL);
+ assert(!(layout->binding[b].flags &
+ VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT));
+
+ /* I'm paranoid */
+ if (layout->binding[b].immutable_samplers == NULL)
+ continue;
+
+ struct kk_sampler *sampler = layout->binding[b].immutable_samplers[0];
+
+ /* YCbCr has to come in through a combined image/sampler */
+ assert(sampler->plane_count == 1);
+
+ assert(sampler->planes[0].hw->handle);
+ }
+
+ vk_free2(&dev->vk.alloc, pAllocator, sampler_desc_data);
+ }
+
+ *pSetLayout = kk_descriptor_set_layout_to_handle(layout);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetDescriptorSetLayoutSupport(
+ VkDevice device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+ VkDescriptorSetLayoutSupport *pSupport)
+{
+ const VkMutableDescriptorTypeCreateInfoEXT *mutable_info =
+ vk_find_struct_const(pCreateInfo->pNext,
+ MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT);
+ const VkDescriptorSetLayoutBindingFlagsCreateInfo *binding_flags =
+ vk_find_struct_const(pCreateInfo->pNext,
+ DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
+
+ /* Figure out the maximum alignment up-front. Otherwise, we need to sort
+ * the list of descriptors by binding number in order to get the size
+ * accumulation right.
+ */
+ uint32_t max_align = 0;
+ for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
+ const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[i];
+ const VkMutableDescriptorTypeListEXT *type_list =
+ kk_descriptor_get_type_list(binding->descriptorType, mutable_info, i);
+
+ uint32_t stride, alignment;
+ kk_descriptor_stride_align_for_type(binding->descriptorType, type_list,
+ &stride, &alignment);
+ max_align = MAX2(max_align, alignment);
+ }
+
+ uint64_t non_variable_size = 0;
+ uint32_t variable_stride = 0;
+ uint32_t variable_count = 0;
+ uint8_t dynamic_buffer_count = 0;
+
+ for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
+ const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[i];
+
+ VkDescriptorBindingFlags flags = 0;
+ if (binding_flags != NULL && binding_flags->bindingCount > 0)
+ flags = binding_flags->pBindingFlags[i];
+
+ switch (binding->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ dynamic_buffer_count += binding->descriptorCount;
+ break;
+ default:
+ break;
+ }
+
+ const VkMutableDescriptorTypeListEXT *type_list =
+ kk_descriptor_get_type_list(binding->descriptorType, mutable_info, i);
+
+ uint32_t stride, alignment;
+ kk_descriptor_stride_align_for_type(binding->descriptorType, type_list,
+ &stride, &alignment);
+
+ if (stride > 0) {
+ assert(stride <= UINT8_MAX);
+ assert(util_is_power_of_two_nonzero(alignment));
+
+ if (flags & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT) {
+ /* From the Vulkan 1.3.256 spec:
+ *
+ * "For the purposes of this command, a variable-sized
+ * descriptor binding with a descriptorCount of zero is treated
+ * as if the descriptorCount is one"
+ */
+ variable_count = MAX2(1, binding->descriptorCount);
+ variable_stride = stride;
+ } else {
+ /* Since we're aligning to the maximum and since this is just a
+ * check for whether or not the max buffer size is big enough, we
+ * keep non_variable_size aligned to max_align.
+ */
+ non_variable_size += stride * binding->descriptorCount;
+ non_variable_size = align64(non_variable_size, max_align);
+ }
+ }
+ }
+
+ uint64_t buffer_size = non_variable_size;
+ if (variable_stride > 0) {
+ buffer_size += variable_stride * variable_count;
+ buffer_size = align64(buffer_size, max_align);
+ }
+
+ uint32_t max_buffer_size;
+ if (pCreateInfo->flags &
+ VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)
+ max_buffer_size = KK_PUSH_DESCRIPTOR_SET_SIZE;
+ else
+ max_buffer_size = KK_MAX_DESCRIPTOR_SET_SIZE;
+
+ pSupport->supported = dynamic_buffer_count <= KK_MAX_DYNAMIC_BUFFERS &&
+ buffer_size <= max_buffer_size;
+
+ vk_foreach_struct(ext, pSupport->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT: {
+ VkDescriptorSetVariableDescriptorCountLayoutSupport *vs = (void *)ext;
+ if (variable_stride > 0) {
+ vs->maxVariableDescriptorCount =
+ (max_buffer_size - non_variable_size) / variable_stride;
+ } else {
+ vs->maxVariableDescriptorCount = 0;
+ }
+ break;
+ }
+
+ default:
+ vk_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetDescriptorSetLayoutSizeEXT(VkDevice device, VkDescriptorSetLayout _layout,
+ VkDeviceSize *pLayoutSizeInBytes)
+{
+ VK_FROM_HANDLE(kk_descriptor_set_layout, layout, _layout);
+
+ *pLayoutSizeInBytes = layout->max_buffer_size;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetDescriptorSetLayoutBindingOffsetEXT(VkDevice device,
+ VkDescriptorSetLayout _layout,
+ uint32_t binding,
+ VkDeviceSize *pOffset)
+{
+ VK_FROM_HANDLE(kk_descriptor_set_layout, layout, _layout);
+
+ *pOffset = layout->binding[binding].offset;
+}
diff --git a/src/kosmickrisp/vulkan/kk_descriptor_set_layout.h b/src/kosmickrisp/vulkan/kk_descriptor_set_layout.h
new file mode 100644
index 00000000000..14ecda0f62a
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_descriptor_set_layout.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_DESCRIPTOR_SET_LAYOUT
+#define KK_DESCRIPTOR_SET_LAYOUT 1
+
+#include "kk_private.h"
+
+#include "vk_descriptor_set_layout.h"
+#include "vk_object.h"
+
+#include "util/bitset.h"
+
+struct kk_device;
+struct kk_physical_device;
+struct kk_sampler;
+struct vk_pipeline_layout;
+
+struct kk_descriptor_set_binding_layout {
+ /* The type of the descriptors in this binding */
+ VkDescriptorType type;
+
+ /* Flags provided when this binding was created */
+ VkDescriptorBindingFlags flags;
+
+ /* Number of array elements in this binding (or size in bytes for inline
+ * uniform data)
+ */
+ uint32_t array_size;
+
+ /* Number of actual descriptors per element */
+ uint32_t count_per_element;
+
+ /* Offset into the descriptor buffer where this descriptor lives */
+ uint32_t offset;
+
+ /* Offset to the mtl_resource_ids array where this descriptor stores them */
+ uint32_t mtl_resources_index;
+
+ /* Stride between array elements in the descriptor buffer */
+ uint8_t stride;
+
+ /* Index into the dynamic buffer binding array */
+ uint8_t dynamic_buffer_index;
+
+ /* Immutable samplers (or NULL if no immutable samplers) */
+ struct kk_sampler **immutable_samplers;
+};
+
+struct kk_descriptor_set_layout {
+ struct vk_descriptor_set_layout vk;
+
+ VkDescriptorSetLayoutCreateFlagBits flags;
+
+ /* Size of the descriptor buffer for this descriptor set */
+ /* Does not contain the size needed for variable count descriptors */
+ uint32_t non_variable_descriptor_buffer_size;
+
+ /* Maximum possible buffer size for this descriptor set */
+ uint32_t max_buffer_size;
+
+ /* Number of dynamic UBO bindings in this set */
+ uint8_t dynamic_buffer_count;
+
+ /* Which dynamic buffers are UBOs */
+ BITSET_DECLARE(dynamic_ubos, KK_MAX_DYNAMIC_BUFFERS);
+
+ /* Number of bindings in this descriptor set */
+ uint32_t binding_count;
+
+ /* Number of descriptors in the layout */
+ uint32_t descriptor_count;
+
+ /* Address to the embedded sampler descriptor buffer.
+ *
+ * This is allocated from nvk_device::heap and has the size
+ * non_variable_descriptor_buffer_size.
+ */
+ uint64_t embedded_samplers_addr;
+
+ /* Bindings in this descriptor set */
+ struct kk_descriptor_set_binding_layout binding[0];
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_descriptor_set_layout, vk.base,
+ VkDescriptorSetLayout,
+ VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
+
+void kk_descriptor_stride_align_for_type(
+ VkDescriptorType type, const VkMutableDescriptorTypeListEXT *type_list,
+ uint32_t *stride, uint32_t *alignment);
+
+static inline struct kk_descriptor_set_layout *
+vk_to_kk_descriptor_set_layout(struct vk_descriptor_set_layout *layout)
+{
+ return container_of(layout, struct kk_descriptor_set_layout, vk);
+}
+
+#endif /* KK_DESCRIPTOR_SET_LAYOUT */
diff --git a/src/kosmickrisp/vulkan/kk_descriptor_types.h b/src/kosmickrisp/vulkan/kk_descriptor_types.h
new file mode 100644
index 00000000000..5296366c8c6
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_descriptor_types.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2024 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef KK_DESCRIPTOR_TYPES
+#define KK_DESCRIPTOR_TYPES 1
+
+#include "kk_private.h"
+
+/* TODO_KOSMICKRISP Reduce size to 32 bytes by moving border to a heap. */
+struct kk_sampled_image_descriptor {
+ uint64_t image_gpu_resource_id;
+ uint16_t sampler_index;
+ uint16_t lod_bias_fp16;
+ uint16_t lod_min_fp16;
+ uint16_t lod_max_fp16;
+ uint32_t has_border;
+ uint32_t pad_to_64_bits;
+ uint32_t border[4];
+ uint64_t pad_to_power_2[3];
+};
+
+static_assert(sizeof(struct kk_sampled_image_descriptor) == 64,
+ "kk_sampled_image_descriptor has no holes");
+
+struct kk_storage_image_descriptor {
+ uint64_t image_gpu_resource_id;
+};
+
+static_assert(sizeof(struct kk_storage_image_descriptor) == 8,
+ "kk_storage_image_descriptor has no holes");
+
+/* This has to match nir_address_format_64bit_bounded_global */
+struct kk_buffer_address {
+ uint64_t base_addr;
+ uint32_t size;
+ uint32_t zero; /* Must be zero! */
+};
+
+static_assert(sizeof(struct kk_buffer_address) == 16,
+ "kk_buffer_address has no holes");
+
+#endif /* KK_DESCRIPTOR_TYPES */
diff --git a/src/kosmickrisp/vulkan/kk_device.c b/src/kosmickrisp/vulkan/kk_device.c
new file mode 100644
index 00000000000..0581b7d4bda
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_device.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_device.h"
+
+#include "kk_cmd_buffer.h"
+#include "kk_entrypoints.h"
+#include "kk_instance.h"
+#include "kk_physical_device.h"
+#include "kk_shader.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+#include "vk_cmd_enqueue_entrypoints.h"
+#include "vk_common_entrypoints.h"
+
+#include "vulkan/wsi/wsi_common.h"
+#include "vk_pipeline_cache.h"
+
+#include <time.h>
+
+DERIVE_HASH_TABLE(mtl_sampler_packed);
+
+static VkResult
+kk_init_sampler_heap(struct kk_device *dev, struct kk_sampler_heap *h)
+{
+ h->ht = mtl_sampler_packed_table_create(NULL);
+ if (!h->ht)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ VkResult result = kk_query_table_init(dev, &h->table, 1024);
+
+ if (result != VK_SUCCESS) {
+ ralloc_free(h->ht);
+ return result;
+ }
+
+ simple_mtx_init(&h->lock, mtx_plain);
+ return VK_SUCCESS;
+}
+
+static void
+kk_destroy_sampler_heap(struct kk_device *dev, struct kk_sampler_heap *h)
+{
+ struct hash_entry *entry = _mesa_hash_table_next_entry(h->ht, NULL);
+ while (entry) {
+ struct kk_rc_sampler *sampler = (struct kk_rc_sampler *)entry->data;
+ mtl_release(sampler->handle);
+ entry = _mesa_hash_table_next_entry(h->ht, entry);
+ }
+ kk_query_table_finish(dev, &h->table);
+ ralloc_free(h->ht);
+ simple_mtx_destroy(&h->lock);
+}
+
+static VkResult
+kk_sampler_heap_add_locked(struct kk_device *dev, struct kk_sampler_heap *h,
+ struct mtl_sampler_packed desc,
+ struct kk_rc_sampler **out)
+{
+ struct hash_entry *ent = _mesa_hash_table_search(h->ht, &desc);
+ if (ent != NULL) {
+ *out = ent->data;
+
+ assert((*out)->refcount != 0);
+ (*out)->refcount++;
+
+ return VK_SUCCESS;
+ }
+
+ struct kk_rc_sampler *rc = ralloc(h->ht, struct kk_rc_sampler);
+ if (!rc)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ mtl_sampler *handle = kk_sampler_create(dev, &desc);
+ uint64_t gpu_id = mtl_sampler_get_gpu_resource_id(handle);
+
+ uint32_t index;
+ VkResult result = kk_query_table_add(dev, &h->table, gpu_id, &index);
+ if (result != VK_SUCCESS) {
+ mtl_release(handle);
+ ralloc_free(rc);
+ return result;
+ }
+
+ *rc = (struct kk_rc_sampler){
+ .key = desc,
+ .handle = handle,
+ .refcount = 1,
+ .index = index,
+ };
+
+ _mesa_hash_table_insert(h->ht, &rc->key, rc);
+ *out = rc;
+
+ return VK_SUCCESS;
+}
+
+VkResult
+kk_sampler_heap_add(struct kk_device *dev, struct mtl_sampler_packed desc,
+ struct kk_rc_sampler **out)
+{
+ struct kk_sampler_heap *h = &dev->samplers;
+
+ simple_mtx_lock(&h->lock);
+ VkResult result = kk_sampler_heap_add_locked(dev, h, desc, out);
+ simple_mtx_unlock(&h->lock);
+
+ return result;
+}
+
+static void
+kk_sampler_heap_remove_locked(struct kk_device *dev, struct kk_sampler_heap *h,
+ struct kk_rc_sampler *rc)
+{
+ assert(rc->refcount != 0);
+ rc->refcount--;
+
+ if (rc->refcount == 0) {
+ mtl_release(rc->handle);
+ kk_query_table_remove(dev, &h->table, rc->index);
+ _mesa_hash_table_remove_key(h->ht, &rc->key);
+ ralloc_free(rc);
+ }
+}
+
+void
+kk_sampler_heap_remove(struct kk_device *dev, struct kk_rc_sampler *rc)
+{
+ struct kk_sampler_heap *h = &dev->samplers;
+
+ simple_mtx_lock(&h->lock);
+ kk_sampler_heap_remove_locked(dev, h, rc);
+ simple_mtx_unlock(&h->lock);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateDevice(VkPhysicalDevice physicalDevice,
+ const VkDeviceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
+{
+ VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice);
+ VkResult result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ struct kk_device *dev;
+
+ dev = vk_zalloc2(&pdev->vk.instance->alloc, pAllocator, sizeof(*dev), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!dev)
+ return vk_error(pdev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* Fill the dispatch table we will expose to the users */
+ vk_device_dispatch_table_from_entrypoints(
+ &dev->exposed_dispatch_table, &vk_cmd_enqueue_device_entrypoints, true);
+ vk_device_dispatch_table_from_entrypoints(&dev->exposed_dispatch_table,
+ &kk_device_entrypoints, false);
+ vk_device_dispatch_table_from_entrypoints(&dev->exposed_dispatch_table,
+ &wsi_device_entrypoints, false);
+ vk_device_dispatch_table_from_entrypoints(
+ &dev->exposed_dispatch_table, &vk_common_device_entrypoints, false);
+
+ struct vk_device_dispatch_table dispatch_table;
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table,
+ &kk_device_entrypoints, true);
+ vk_device_dispatch_table_from_entrypoints(
+ &dispatch_table, &vk_common_device_entrypoints, false);
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table,
+ &wsi_device_entrypoints, false);
+
+ result = vk_device_init(&dev->vk, &pdev->vk, &dispatch_table, pCreateInfo,
+ pAllocator);
+ if (result != VK_SUCCESS)
+ goto fail_alloc;
+
+ dev->vk.shader_ops = &kk_device_shader_ops;
+ dev->mtl_handle = pdev->mtl_dev_handle;
+ dev->vk.command_buffer_ops = &kk_cmd_buffer_ops;
+ dev->vk.command_dispatch_table = &dev->vk.dispatch_table;
+
+ /* Buffer to use as null descriptor */
+ result = kk_alloc_bo(dev, &dev->vk.base, sizeof(uint64_t) * 8, 8u,
+ &dev->null_descriptor);
+ if (result != VK_SUCCESS)
+ goto fail_init;
+
+ result =
+ kk_queue_init(dev, &dev->queue, &pCreateInfo->pQueueCreateInfos[0], 0);
+ if (result != VK_SUCCESS)
+ goto fail_vab_memory;
+
+ result = kk_device_init_meta(dev);
+ if (result != VK_SUCCESS)
+ goto fail_mem_cache;
+
+ result = kk_query_table_init(dev, &dev->occlusion_queries,
+ KK_MAX_OCCLUSION_QUERIES);
+ if (result != VK_SUCCESS)
+ goto fail_meta;
+
+ result = kk_init_sampler_heap(dev, &dev->samplers);
+ if (result != VK_SUCCESS)
+ goto fail_query_table;
+
+ result = kk_device_init_lib(dev);
+ if (result != VK_SUCCESS)
+ goto fail_sampler_heap;
+
+ simple_mtx_init(&dev->user_heap_cache.mutex, mtx_plain);
+ util_dynarray_init(&dev->user_heap_cache.handles, NULL);
+
+ *pDevice = kk_device_to_handle(dev);
+
+ dev->gpu_capture_enabled = kk_get_environment_boolean(KK_ENABLE_GPU_CAPTURE);
+ mtl_start_gpu_capture(dev->mtl_handle);
+
+ return VK_SUCCESS;
+
+fail_sampler_heap:
+ kk_destroy_sampler_heap(dev, &dev->samplers);
+fail_query_table:
+ kk_query_table_finish(dev, &dev->occlusion_queries);
+fail_meta:
+ kk_device_finish_meta(dev);
+fail_mem_cache:
+ kk_queue_finish(dev, &dev->queue);
+fail_vab_memory:
+ kk_destroy_bo(dev, dev->null_descriptor);
+fail_init:
+ vk_device_finish(&dev->vk);
+fail_alloc:
+ vk_free(&dev->vk.alloc, dev);
+ return result;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_device, dev, _device);
+
+ if (!dev)
+ return;
+
+ /* Meta first since it may destroy Vulkan objects */
+ kk_device_finish_meta(dev);
+
+ util_dynarray_fini(&dev->user_heap_cache.handles);
+ simple_mtx_destroy(&dev->user_heap_cache.mutex);
+ kk_device_finish_lib(dev);
+ kk_query_table_finish(dev, &dev->occlusion_queries);
+ kk_destroy_sampler_heap(dev, &dev->samplers);
+
+ kk_queue_finish(dev, &dev->queue);
+ kk_destroy_bo(dev, dev->null_descriptor);
+ vk_device_finish(&dev->vk);
+
+ if (dev->gpu_capture_enabled) {
+ mtl_stop_gpu_capture();
+ }
+
+ vk_free(&dev->vk.alloc, dev);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_GetCalibratedTimestampsKHR(
+ VkDevice _device, uint32_t timestampCount,
+ const VkCalibratedTimestampInfoKHR *pTimestampInfos, uint64_t *pTimestamps,
+ uint64_t *pMaxDeviation)
+{
+ uint64_t max_clock_period = 0;
+ uint64_t begin, end;
+ int d;
+
+#ifdef CLOCK_MONOTONIC_RAW
+ begin = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
+#else
+ begin = vk_clock_gettime(CLOCK_MONOTONIC);
+#endif
+
+ for (d = 0; d < timestampCount; d++) {
+ switch (pTimestampInfos[d].timeDomain) {
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
+ pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
+ max_clock_period = MAX2(max_clock_period, 1);
+ break;
+
+#ifdef CLOCK_MONOTONIC_RAW
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
+ pTimestamps[d] = begin;
+ break;
+#endif
+ default:
+ pTimestamps[d] = 0;
+ break;
+ }
+ }
+
+#ifdef CLOCK_MONOTONIC_RAW
+ end = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
+#else
+ end = vk_clock_gettime(CLOCK_MONOTONIC);
+#endif
+
+ *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
+
+ return VK_SUCCESS;
+}
+
+/* We need to implement this ourselves so we give the fake ones for vk_common_*
+ * to work when executing actual commands */
+static PFN_vkVoidFunction
+kk_device_get_proc_addr(const struct kk_device *device, const char *name)
+{
+ if (device == NULL || name == NULL)
+ return NULL;
+
+ struct vk_instance *instance = device->vk.physical->instance;
+ return vk_device_dispatch_table_get_if_supported(
+ &device->exposed_dispatch_table, name, instance->app_info.api_version,
+ &instance->enabled_extensions, &device->vk.enabled_extensions);
+}
+
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+kk_GetDeviceProcAddr(VkDevice _device, const char *pName)
+{
+ VK_FROM_HANDLE(kk_device, device, _device);
+ return kk_device_get_proc_addr(device, pName);
+}
+
+void
+kk_device_add_user_heap(struct kk_device *dev, mtl_heap *heap)
+{
+ simple_mtx_lock(&dev->user_heap_cache.mutex);
+ util_dynarray_append(&dev->user_heap_cache.handles, mtl_heap *, heap);
+ dev->user_heap_cache.hash += 1u;
+ simple_mtx_unlock(&dev->user_heap_cache.mutex);
+}
+
+void
+kk_device_remove_user_heap(struct kk_device *dev, mtl_heap *heap)
+{
+ simple_mtx_lock(&dev->user_heap_cache.mutex);
+ util_dynarray_delete_unordered(&dev->user_heap_cache.handles, mtl_heap *,
+ heap);
+ simple_mtx_unlock(&dev->user_heap_cache.mutex);
+}
diff --git a/src/kosmickrisp/vulkan/kk_device.h b/src/kosmickrisp/vulkan/kk_device.h
new file mode 100644
index 00000000000..50e05e934c9
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_device.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_DEVICE_H
+#define KK_DEVICE_H 1
+
+#include "kk_private.h"
+
+#include "kk_query_table.h"
+#include "kk_queue.h"
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "util/u_dynarray.h"
+
+#include "vk_device.h"
+#include "vk_meta.h"
+#include "vk_queue.h"
+
+struct kk_bo;
+struct kk_physical_device;
+struct vk_pipeline_cache;
+
+enum kk_device_lib_pipeline {
+ KK_LIB_IMM_WRITE = 0,
+ KK_LIB_COPY_QUERY,
+ KK_LIB_TRIANGLE_FAN,
+ KK_LIB_COUNT,
+};
+
+struct kk_user_heap_cache {
+ simple_mtx_t mutex;
+ uint32_t hash;
+ struct util_dynarray handles;
+};
+
+struct mtl_sampler_packed {
+ enum mtl_sampler_address_mode mode_u;
+ enum mtl_sampler_address_mode mode_v;
+ enum mtl_sampler_address_mode mode_w;
+ enum mtl_sampler_border_color border_color;
+
+ enum mtl_sampler_min_mag_filter min_filter;
+ enum mtl_sampler_min_mag_filter mag_filter;
+ enum mtl_sampler_mip_filter mip_filter;
+
+ enum mtl_compare_function compare_func;
+ float min_lod;
+ float max_lod;
+ uint32_t max_anisotropy;
+ bool normalized_coordinates;
+};
+
+struct kk_rc_sampler {
+ struct mtl_sampler_packed key;
+
+ mtl_sampler *handle;
+
+ /* Reference count for this hardware sampler, protected by the heap mutex */
+ uint16_t refcount;
+
+ /* Index of this hardware sampler in the hardware sampler heap */
+ uint16_t index;
+};
+
+struct kk_sampler_heap {
+ simple_mtx_t lock;
+
+ struct kk_query_table table;
+
+ /* Map of agx_sampler_packed to hk_rc_sampler */
+ struct hash_table *ht;
+};
+
+struct kk_device {
+ struct vk_device vk;
+
+ mtl_device *mtl_handle;
+
+ /* Dispatch table exposed to the user. Required since we need to record all
+ * commands due to Metal limitations */
+ struct vk_device_dispatch_table exposed_dispatch_table;
+
+ struct kk_bo *null_descriptor;
+
+ struct kk_sampler_heap samplers;
+ struct kk_query_table occlusion_queries;
+
+ /* Track all heaps the user allocated so we can set them all as resident when
+ * recording as required by Metal. */
+ struct kk_user_heap_cache user_heap_cache;
+
+ mtl_compute_pipeline_state *lib_pipelines[KK_LIB_COUNT];
+
+ struct kk_queue queue;
+
+ struct vk_meta_device meta;
+
+ bool gpu_capture_enabled;
+};
+
+VK_DEFINE_HANDLE_CASTS(kk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
+
+static inline mtl_compute_pipeline_state *
+kk_device_lib_pipeline(const struct kk_device *dev,
+ enum kk_device_lib_pipeline pipeline)
+{
+ assert(pipeline < KK_LIB_COUNT);
+ return dev->lib_pipelines[pipeline];
+}
+
+static inline struct kk_physical_device *
+kk_device_physical(const struct kk_device *dev)
+{
+ return (struct kk_physical_device *)dev->vk.physical;
+}
+
+VkResult kk_device_init_meta(struct kk_device *dev);
+void kk_device_finish_meta(struct kk_device *dev);
+VkResult kk_device_init_lib(struct kk_device *dev);
+void kk_device_finish_lib(struct kk_device *dev);
+void kk_device_add_user_heap(struct kk_device *dev, mtl_heap *heap);
+void kk_device_remove_user_heap(struct kk_device *dev, mtl_heap *heap);
+
+/* Required to create a sampler */
+mtl_sampler *kk_sampler_create(struct kk_device *dev,
+ const struct mtl_sampler_packed *packed);
+VkResult kk_sampler_heap_add(struct kk_device *dev,
+ struct mtl_sampler_packed desc,
+ struct kk_rc_sampler **out);
+void kk_sampler_heap_remove(struct kk_device *dev, struct kk_rc_sampler *rc);
+
+#endif // KK_DEVICE_H
diff --git a/src/kosmickrisp/vulkan/kk_device_lib.c b/src/kosmickrisp/vulkan/kk_device_lib.c
new file mode 100644
index 00000000000..9d2afbf9e45
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_device_lib.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_device.h"
+
+#include "kk_shader.h"
+
+#include "kkcl.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+
+static nir_def *
+load_struct_var(nir_builder *b, nir_variable *var, uint32_t field)
+{
+ nir_deref_instr *deref =
+ nir_build_deref_struct(b, nir_build_deref_var(b, var), field);
+ return nir_load_deref(b, deref);
+}
+
+static nir_shader *
+create_imm_write_shader()
+{
+ nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
+ "kk-meta-imm-write-u64");
+ nir_builder *b = &build;
+
+ struct glsl_struct_field push_fields[] = {
+ {.type = glsl_uint64_t_type(), .name = "buffer_address", .offset = 0},
+ };
+ const struct glsl_type *push_iface_type = glsl_interface_type(
+ push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140,
+ false /* row_major */, "push");
+ nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const,
+ push_iface_type, "push");
+
+ b->shader->info.workgroup_size[0] = 1;
+ b->shader->info.workgroup_size[1] = 1;
+ b->shader->info.workgroup_size[2] = 1;
+
+ libkk_write_u64(b, load_struct_var(b, push, 0));
+
+ return build.shader;
+}
+
+static nir_shader *
+create_copy_query_shader()
+{
+ nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
+ "kk-meta-copy-queries");
+ nir_builder *b = &build;
+
+ struct glsl_struct_field push_fields[] = {
+ {.type = glsl_uint64_t_type(), .name = "availability", .offset = 0},
+ {.type = glsl_uint64_t_type(), .name = "results", .offset = 8},
+ {.type = glsl_uint64_t_type(), .name = "indices", .offset = 16},
+ {.type = glsl_uint64_t_type(), .name = "dst_addr", .offset = 24},
+ {.type = glsl_uint64_t_type(), .name = "dst_stride", .offset = 32},
+ {.type = glsl_uint_type(), .name = "first_query", .offset = 40},
+ {.type = glsl_uint_type(), .name = "flags", .offset = 44},
+ {.type = glsl_uint16_t_type(), .name = "reports_per_query", .offset = 48},
+ };
+ /* TODO_KOSMICKRISP Don't use push constants and directly bind the buffer to
+ * the binding index. This requires compiler work first to remove the
+ * hard-coded buffer0 value. Same applies to other creation functions.
+ */
+ const struct glsl_type *push_iface_type = glsl_interface_type(
+ push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140,
+ false /* row_major */, "push");
+ nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const,
+ push_iface_type, "push");
+
+ b->shader->info.workgroup_size[0] = 1;
+ b->shader->info.workgroup_size[1] = 1;
+ b->shader->info.workgroup_size[2] = 1;
+
+ libkk_copy_queries(b, load_struct_var(b, push, 0),
+ load_struct_var(b, push, 1), load_struct_var(b, push, 2),
+ load_struct_var(b, push, 3), load_struct_var(b, push, 4),
+ load_struct_var(b, push, 5), load_struct_var(b, push, 6),
+ load_struct_var(b, push, 7));
+
+ return build.shader;
+}
+
+static nir_shader *
+create_triangle_fan_shader()
+{
+ nir_builder build = nir_builder_init_simple_shader(
+ MESA_SHADER_COMPUTE, NULL, "kk-device-unroll-geomtry-and-restart");
+ nir_builder *b = &build;
+
+ struct glsl_struct_field push_fields[] = {
+ {.type = glsl_uint64_t_type(), .name = "index_buffer", .offset = 0},
+ {.type = glsl_uint64_t_type(), .name = "out_ptr", .offset = 8},
+ {.type = glsl_uint64_t_type(), .name = "indirect_in", .offset = 16},
+ {.type = glsl_uint64_t_type(), .name = "indirect_out", .offset = 24},
+ {.type = glsl_uint_type(), .name = "restart_index", .offset = 32},
+ {.type = glsl_uint_type(), .name = "index_buffer_size_el", .offset = 36},
+ {.type = glsl_uint_type(), .name = "in_el_size_B,", .offset = 40},
+ {.type = glsl_uint_type(), .name = "out_el_size_B,", .offset = 44},
+ {.type = glsl_uint_type(), .name = "flatshade_first", .offset = 48},
+ {.type = glsl_uint_type(), .name = "mode", .offset = 52},
+ };
+ const struct glsl_type *push_iface_type = glsl_interface_type(
+ push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140,
+ false /* row_major */, "push");
+ nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const,
+ push_iface_type, "push");
+
+ b->shader->info.workgroup_size[0] = 1;
+ b->shader->info.workgroup_size[1] = 1;
+ b->shader->info.workgroup_size[2] = 1;
+
+ libkk_unroll_geometry_and_restart(
+ b, load_struct_var(b, push, 0), load_struct_var(b, push, 1),
+ load_struct_var(b, push, 2), load_struct_var(b, push, 3),
+ load_struct_var(b, push, 4), load_struct_var(b, push, 5),
+ load_struct_var(b, push, 6), load_struct_var(b, push, 7),
+ load_struct_var(b, push, 8), load_struct_var(b, push, 9));
+
+ return build.shader;
+}
+
+static struct {
+ enum kk_device_lib_pipeline ndx;
+ nir_shader *(*create_shader_fn)();
+} lib_shaders[KK_LIB_COUNT] = {
+ {KK_LIB_IMM_WRITE, create_imm_write_shader},
+ {KK_LIB_COPY_QUERY, create_copy_query_shader},
+ {KK_LIB_TRIANGLE_FAN, create_triangle_fan_shader},
+};
+static_assert(ARRAY_SIZE(lib_shaders) == KK_LIB_COUNT,
+ "Device lib shader count and created shader count mismatch");
+
+VkResult
+kk_device_init_lib(struct kk_device *dev)
+{
+ VkResult result = VK_SUCCESS;
+ uint32_t i = 0u;
+ for (; i < KK_LIB_COUNT; ++i) {
+ nir_shader *s = lib_shaders[i].create_shader_fn();
+ if (!s)
+ goto fail;
+
+ struct kk_shader *shader = NULL;
+ result = kk_compile_nir_shader(dev, s, &dev->vk.alloc, &shader);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ mtl_library *library = mtl_new_library(dev->mtl_handle, shader->msl_code);
+ if (library == NULL)
+ goto fail;
+
+ uint32_t local_size_threads = shader->info.cs.local_size.x *
+ shader->info.cs.local_size.y *
+ shader->info.cs.local_size.z;
+ mtl_function *function =
+ mtl_new_function_with_name(library, shader->entrypoint_name);
+ dev->lib_pipelines[i] = mtl_new_compute_pipeline_state(
+ dev->mtl_handle, function, local_size_threads);
+ mtl_release(function);
+ mtl_release(library);
+
+ /* We no longer need the shader. Although it may be useful to keep it
+ * alive for the info maybe? */
+ shader->vk.ops->destroy(&dev->vk, &shader->vk, &dev->vk.alloc);
+
+ if (!dev->lib_pipelines[i])
+ goto fail;
+ }
+
+ return result;
+
+fail:
+ for (uint32_t j = 0u; j < i; ++j)
+ mtl_release(dev->lib_pipelines[j]);
+ return vk_error(dev, result);
+}
+
+void
+kk_device_finish_lib(struct kk_device *dev)
+{
+ for (uint32_t i = 0; i < KK_LIB_COUNT; ++i)
+ mtl_release(dev->lib_pipelines[i]);
+}
diff --git a/src/kosmickrisp/vulkan/kk_device_memory.c b/src/kosmickrisp/vulkan/kk_device_memory.c
new file mode 100644
index 00000000000..e020aa979f4
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_device_memory.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_device_memory.h"
+
+#include "kk_device.h"
+#include "kk_entrypoints.h"
+#include "kk_physical_device.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+#include "vulkan/vulkan_metal.h"
+
+#include "util/u_atomic.h"
+#include "util/u_memory.h"
+
+#include <inttypes.h>
+#include <sys/mman.h>
+
+/* Supports mtlheap only */
+const VkExternalMemoryProperties kk_mtlheap_mem_props = {
+ .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
+ VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT,
+ .exportFromImportedHandleTypes =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT,
+ .compatibleHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT,
+};
+
+#ifdef VK_USE_PLATFORM_METAL_EXT
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_GetMemoryMetalHandlePropertiesEXT(
+ VkDevice device, VkExternalMemoryHandleTypeFlagBits handleType,
+ const void *pHandle,
+ VkMemoryMetalHandlePropertiesEXT *pMemoryMetalHandleProperties)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ struct kk_physical_device *pdev = kk_device_physical(dev);
+
+ /* We only support heaps since that's the backing for all our memory and
+ * simplifies implementation */
+ switch (handleType) {
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT:
+ break;
+ default:
+ return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ }
+ pMemoryMetalHandleProperties->memoryTypeBits =
+ BITFIELD_MASK(pdev->mem_type_count);
+
+ return VK_SUCCESS;
+}
+#endif /* VK_USE_PLATFORM_METAL_EXT */
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_AllocateMemory(VkDevice device, const VkMemoryAllocateInfo *pAllocateInfo,
+ const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ struct kk_physical_device *pdev = kk_device_physical(dev);
+ struct kk_device_memory *mem;
+ VkResult result = VK_SUCCESS;
+ const VkImportMemoryMetalHandleInfoEXT *metal_info = vk_find_struct_const(
+ pAllocateInfo->pNext, IMPORT_MEMORY_METAL_HANDLE_INFO_EXT);
+ const VkMemoryType *type = &pdev->mem_types[pAllocateInfo->memoryTypeIndex];
+
+ // TODO_KOSMICKRISP Do the actual memory allocation with alignment requirements
+ uint32_t alignment = (1ULL << 12);
+
+ const uint64_t aligned_size =
+ align64(pAllocateInfo->allocationSize, alignment);
+
+ mem = vk_device_memory_create(&dev->vk, pAllocateInfo, pAllocator,
+ sizeof(*mem));
+ if (!mem)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ if (metal_info && metal_info->handleType) {
+ /* We only support heaps since that's the backing for all our memory and
+ * simplifies implementation */
+ assert(metal_info->handleType ==
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT);
+ mem->bo = CALLOC_STRUCT(kk_bo);
+ if (!mem->bo) {
+ result = vk_errorf(&dev->vk.base, VK_ERROR_OUT_OF_DEVICE_MEMORY, "%m");
+ goto fail_alloc;
+ }
+ mem->bo->mtl_handle = mtl_retain(metal_info->handle);
+ mem->bo->map =
+ mtl_new_buffer_with_length(mem->bo->mtl_handle, mem->vk.size, 0u);
+ mem->bo->gpu = mtl_buffer_get_gpu_address(mem->bo->map);
+ mem->bo->cpu = mtl_get_contents(mem->bo->map);
+ mem->bo->size_B = mtl_heap_get_size(mem->bo->mtl_handle);
+ } else {
+ result =
+ kk_alloc_bo(dev, &dev->vk.base, aligned_size, alignment, &mem->bo);
+ if (result != VK_SUCCESS)
+ goto fail_alloc;
+ }
+
+ struct kk_memory_heap *heap = &pdev->mem_heaps[type->heapIndex];
+ p_atomic_add(&heap->used, mem->bo->size_B);
+
+ kk_device_add_user_heap(dev, mem->bo->mtl_handle);
+
+ *pMem = kk_device_memory_to_handle(mem);
+
+ return VK_SUCCESS;
+
+fail_alloc:
+ vk_device_memory_destroy(&dev->vk, pAllocator, &mem->vk);
+ return result;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_FreeMemory(VkDevice device, VkDeviceMemory _mem,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_device_memory, mem, _mem);
+ struct kk_physical_device *pdev = kk_device_physical(dev);
+
+ if (!mem)
+ return;
+
+ kk_device_remove_user_heap(dev, mem->bo->mtl_handle);
+
+ const VkMemoryType *type = &pdev->mem_types[mem->vk.memory_type_index];
+ struct kk_memory_heap *heap = &pdev->mem_heaps[type->heapIndex];
+ p_atomic_add(&heap->used, -((int64_t)mem->bo->size_B));
+
+ kk_destroy_bo(dev, mem->bo);
+
+ vk_device_memory_destroy(&dev->vk, pAllocator, &mem->vk);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_MapMemory2KHR(VkDevice device, const VkMemoryMapInfoKHR *pMemoryMapInfo,
+ void **ppData)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_device_memory, mem, pMemoryMapInfo->memory);
+ VkResult result = VK_SUCCESS;
+
+ if (mem == NULL) {
+ *ppData = NULL;
+ return VK_SUCCESS;
+ }
+
+ const VkDeviceSize offset = pMemoryMapInfo->offset;
+ const VkDeviceSize size = vk_device_memory_range(
+ &mem->vk, pMemoryMapInfo->offset, pMemoryMapInfo->size);
+
+ /* From the Vulkan spec version 1.0.32 docs for MapMemory:
+ *
+ * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
+ * assert(size != 0);
+ * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
+ * equal to the size of the memory minus offset
+ */
+ assert(size > 0);
+ assert(offset + size <= mem->bo->size_B);
+
+ if (size != (size_t)size) {
+ return vk_errorf(dev, VK_ERROR_MEMORY_MAP_FAILED,
+ "requested size 0x%" PRIx64 " does not fit in %u bits",
+ size, (unsigned)(sizeof(size_t) * 8));
+ }
+
+ /* From the Vulkan 1.2.194 spec:
+ *
+ * "memory must not be currently host mapped"
+ */
+ if (mem->map != NULL) {
+ return vk_errorf(dev, VK_ERROR_MEMORY_MAP_FAILED,
+ "Memory object already mapped.");
+ }
+
+ // TODO_KOSMICKRISP Use mmap here to so we can support VK_EXT_map_memory_placed
+ mem->map = mem->bo->cpu;
+
+ *ppData = mem->map + offset;
+
+ return result;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_UnmapMemory2KHR(VkDevice device,
+ const VkMemoryUnmapInfoKHR *pMemoryUnmapInfo)
+{
+ VK_FROM_HANDLE(kk_device_memory, mem, pMemoryUnmapInfo->memory);
+
+ if (mem == NULL)
+ return VK_SUCCESS;
+
+ // TODO_KOSMICKRISP Use unmap here to so we can support
+ // VK_EXT_map_memory_placed
+ mem->map = NULL;
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_FlushMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount,
+ const VkMappedMemoryRange *pMemoryRanges)
+{
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_InvalidateMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount,
+ const VkMappedMemoryRange *pMemoryRanges)
+{
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory _mem,
+ VkDeviceSize *pCommittedMemoryInBytes)
+{
+ VK_FROM_HANDLE(kk_device_memory, mem, _mem);
+
+ *pCommittedMemoryInBytes = mem->bo->size_B;
+}
+
+#ifdef VK_USE_PLATFORM_METAL_EXT
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_GetMemoryMetalHandleEXT(
+ VkDevice device, const VkMemoryGetMetalHandleInfoEXT *pGetMetalHandleInfo,
+ void **pHandle)
+{
+ /* We only support heaps since that's the backing for all our memory and
+ * simplifies implementation */
+ assert(pGetMetalHandleInfo->handleType ==
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT);
+ VK_FROM_HANDLE(kk_device_memory, mem, pGetMetalHandleInfo->memory);
+
+ /* From the Vulkan spec of vkGetMemoryMetalHandleEXT:
+ *
+ * "Unless the app retains the handle object returned by the call,
+ * the lifespan will be the same as the associated VkDeviceMemory"
+ */
+ *pHandle = mem->bo->mtl_handle;
+ return VK_SUCCESS;
+}
+#endif /* VK_USE_PLATFORM_METAL_EXT */
+
+VKAPI_ATTR uint64_t VKAPI_CALL
+kk_GetDeviceMemoryOpaqueCaptureAddress(
+ UNUSED VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
+{
+ VK_FROM_HANDLE(kk_device_memory, mem, pInfo->memory);
+
+ return mem->bo->gpu;
+}
diff --git a/src/kosmickrisp/vulkan/kk_device_memory.h b/src/kosmickrisp/vulkan/kk_device_memory.h
new file mode 100644
index 00000000000..44ced28aa67
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_device_memory.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_MEMORY_H
+#define KK_MEMORY_H 1
+
+#include "kk_private.h"
+
+#include "kk_bo.h"
+
+#include "vk_device_memory.h"
+
+#include "util/list.h"
+
+struct kk_device_memory {
+ struct vk_device_memory vk;
+ struct kk_bo *bo;
+ void *map;
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_device_memory, vk.base, VkDeviceMemory,
+ VK_OBJECT_TYPE_DEVICE_MEMORY)
+
+extern const VkExternalMemoryProperties kk_mtlheap_mem_props;
+
+#endif // KK_MEMORY_H
diff --git a/src/kosmickrisp/vulkan/kk_encoder.c b/src/kosmickrisp/vulkan/kk_encoder.c
new file mode 100644
index 00000000000..ca083674939
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_encoder.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_encoder.h"
+
+#include "kk_bo.h"
+#include "kk_cmd_buffer.h"
+#include "kk_queue.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+#include "kosmickrisp/bridge/vk_to_mtl_map.h"
+
+#include "cl/kk_query.h"
+
+static void
+kk_encoder_start_internal(struct kk_encoder_internal *encoder,
+ mtl_device *device, mtl_command_queue *queue)
+{
+ encoder->cmd_buffer = mtl_new_command_buffer(queue);
+ encoder->last_used = KK_ENC_NONE;
+ util_dynarray_init(&encoder->fences, NULL);
+}
+
+VkResult
+kk_encoder_init(mtl_device *device, struct kk_queue *queue,
+ struct kk_encoder **encoder)
+{
+ assert(encoder && device && queue);
+ struct kk_encoder *enc = (struct kk_encoder *)malloc(sizeof(*enc));
+ if (!enc)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ memset(enc, 0u, sizeof(*enc));
+ enc->dev = device;
+ kk_encoder_start_internal(&enc->main, device, queue->main.mtl_handle);
+ kk_encoder_start_internal(&enc->pre_gfx, device, queue->pre_gfx.mtl_handle);
+ enc->event = mtl_new_event(device);
+ util_dynarray_init(&enc->imm_writes, NULL);
+ util_dynarray_init(&enc->resident_buffers, NULL);
+ util_dynarray_init(&enc->copy_query_pool_result_infos, NULL);
+
+ *encoder = enc;
+ return VK_SUCCESS;
+}
+
+mtl_render_encoder *
+kk_encoder_start_render(struct kk_cmd_buffer *cmd,
+ mtl_render_pass_descriptor *descriptor,
+ uint32_t view_mask)
+{
+ struct kk_encoder *encoder = cmd->encoder;
+ /* We must not already be in a render encoder */
+ assert(encoder->main.last_used != KK_ENC_RENDER ||
+ encoder->main.encoder == NULL);
+ if (encoder->main.last_used != KK_ENC_RENDER) {
+ kk_encoder_signal_fence_and_end(cmd);
+
+ /* Before we start any render operation we need to ensure we have the
+ * requried signals to insert pre_gfx execution before the render encoder
+ * in case we need to insert commands to massage input data for things
+ * like triangle fans. For this, we signal the value pre_gfx will wait on,
+ * and we wait on the value pre_gfx will signal once completed.
+ */
+ encoder->signal_value_pre_gfx = encoder->event_value;
+ mtl_encode_signal_event(encoder->main.cmd_buffer, encoder->event,
+ ++encoder->event_value);
+ encoder->wait_value_pre_gfx = encoder->event_value;
+ mtl_encode_wait_for_event(encoder->main.cmd_buffer, encoder->event,
+ ++encoder->event_value);
+
+ encoder->main.encoder = mtl_new_render_command_encoder_with_descriptor(
+ encoder->main.cmd_buffer, descriptor);
+ if (encoder->main.wait_fence) {
+ mtl_render_wait_for_fence(
+ encoder->main.encoder,
+ util_dynarray_top(&encoder->main.fences, mtl_fence *));
+ encoder->main.wait_fence = false;
+ }
+
+ uint32_t layer_ids[KK_MAX_MULTIVIEW_VIEW_COUNT] = {};
+ uint32_t count = 0u;
+ u_foreach_bit(id, view_mask)
+ layer_ids[count++] = id;
+ if (view_mask == 0u) {
+ layer_ids[count++] = 0;
+ }
+ mtl_set_vertex_amplification_count(encoder->main.encoder, layer_ids,
+ count);
+ encoder->main.user_heap_hash = UINT32_MAX;
+
+ /* Bind read only data aka samplers' argument buffer. */
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+ mtl_set_vertex_buffer(encoder->main.encoder, dev->samplers.table.bo->map,
+ 0u, 1u);
+ mtl_set_fragment_buffer(encoder->main.encoder,
+ dev->samplers.table.bo->map, 0u, 1u);
+ }
+ encoder->main.last_used = KK_ENC_RENDER;
+ return encoder->main.encoder;
+}
+
+mtl_compute_encoder *
+kk_encoder_start_compute(struct kk_cmd_buffer *cmd)
+{
+ struct kk_encoder *encoder = cmd->encoder;
+ /* We must not already be in a render encoder */
+ assert(encoder->main.last_used != KK_ENC_RENDER ||
+ encoder->main.encoder == NULL);
+ struct kk_encoder_internal *enc = &encoder->main;
+ if (encoder->main.last_used != KK_ENC_COMPUTE) {
+ kk_encoder_signal_fence_and_end(cmd);
+ enc->encoder = mtl_new_compute_command_encoder(enc->cmd_buffer);
+ if (enc->wait_fence) {
+ mtl_compute_wait_for_fence(
+ enc->encoder, util_dynarray_top(&enc->fences, mtl_fence *));
+ enc->wait_fence = false;
+ }
+ enc->user_heap_hash = UINT32_MAX;
+
+ /* Bind read only data aka samplers' argument buffer. */
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+ mtl_compute_set_buffer(enc->encoder, dev->samplers.table.bo->map, 0u, 1u);
+ }
+ encoder->main.last_used = KK_ENC_COMPUTE;
+ return encoder->main.encoder;
+}
+
+mtl_compute_encoder *
+kk_encoder_start_blit(struct kk_cmd_buffer *cmd)
+{
+ struct kk_encoder *encoder = cmd->encoder;
+ /* We must not already be in a render encoder */
+ assert(encoder->main.last_used != KK_ENC_RENDER ||
+ encoder->main.encoder == NULL);
+ struct kk_encoder_internal *enc = &encoder->main;
+ if (encoder->main.last_used != KK_ENC_BLIT) {
+ kk_encoder_signal_fence_and_end(cmd);
+ enc->encoder = mtl_new_blit_command_encoder(enc->cmd_buffer);
+ if (enc->wait_fence) {
+ mtl_compute_wait_for_fence(
+ enc->encoder, util_dynarray_top(&enc->fences, mtl_fence *));
+ enc->wait_fence = false;
+ }
+ }
+ encoder->main.last_used = KK_ENC_BLIT;
+ return encoder->main.encoder;
+}
+
+void
+kk_encoder_end(struct kk_cmd_buffer *cmd)
+{
+ assert(cmd);
+
+ kk_encoder_signal_fence_and_end(cmd);
+
+ /* Let remaining render encoders run without waiting since we are done */
+ mtl_encode_signal_event(cmd->encoder->pre_gfx.cmd_buffer,
+ cmd->encoder->event, cmd->encoder->event_value);
+}
+
+struct kk_imm_write_push {
+ uint64_t buffer_address;
+ uint32_t count;
+};
+
+void
+upload_queue_writes(struct kk_cmd_buffer *cmd)
+{
+ struct kk_encoder *enc = cmd->encoder;
+
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+ uint32_t count = util_dynarray_num_elements(&enc->imm_writes, uint64_t) / 2u;
+ if (count != 0) {
+ mtl_compute_encoder *compute = kk_compute_encoder(cmd);
+ struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, enc->imm_writes.size, 8u);
+ /* kk_cmd_allocate_buffer sets the cmd buffer error so we can just exit */
+ if (!bo)
+ return;
+ memcpy(bo->cpu, enc->imm_writes.data, enc->imm_writes.size);
+ uint32_t buffer_count =
+ util_dynarray_num_elements(&enc->resident_buffers, mtl_buffer *);
+ mtl_compute_use_resource(compute, bo->map, MTL_RESOURCE_USAGE_READ);
+ mtl_compute_use_resources(
+ compute, enc->resident_buffers.data, buffer_count,
+ MTL_RESOURCE_USAGE_READ | MTL_RESOURCE_USAGE_WRITE);
+ struct kk_imm_write_push push_data = {
+ .buffer_address = bo->gpu,
+ .count = count,
+ };
+ kk_cmd_dispatch_pipeline(cmd, compute,
+ kk_device_lib_pipeline(dev, KK_LIB_IMM_WRITE),
+ &push_data, sizeof(push_data), count, 1, 1);
+ enc->resident_buffers.size = 0u;
+ enc->imm_writes.size = 0u;
+ }
+
+ count = util_dynarray_num_elements(&enc->copy_query_pool_result_infos,
+ struct kk_copy_query_pool_results_info);
+ if (count != 0u) {
+ mtl_compute_encoder *compute = kk_compute_encoder(cmd);
+ uint32_t buffer_count =
+ util_dynarray_num_elements(&enc->resident_buffers, mtl_buffer *);
+ mtl_compute_use_resources(
+ compute, enc->resident_buffers.data, buffer_count,
+ MTL_RESOURCE_USAGE_READ | MTL_RESOURCE_USAGE_WRITE);
+
+ for (uint32_t i = 0u; i < count; ++i) {
+ struct kk_copy_query_pool_results_info *push_data =
+ util_dynarray_element(&enc->copy_query_pool_result_infos,
+ struct kk_copy_query_pool_results_info, i);
+
+ kk_cmd_dispatch_pipeline(
+ cmd, compute, kk_device_lib_pipeline(dev, KK_LIB_COPY_QUERY),
+ push_data, sizeof(*push_data), push_data->query_count, 1, 1);
+ }
+ enc->resident_buffers.size = 0u;
+ enc->copy_query_pool_result_infos.size = 0u;
+ }
+
+ /* All immediate write done, reset encoder */
+ kk_encoder_signal_fence_and_end(cmd);
+}
+
+void
+kk_encoder_signal_fence_and_end(struct kk_cmd_buffer *cmd)
+{
+ struct kk_encoder *encoder = cmd->encoder;
+ /* End pre_gfx */
+ if (encoder->pre_gfx.encoder) {
+ mtl_end_encoding(encoder->pre_gfx.encoder);
+ mtl_release(encoder->pre_gfx.encoder);
+ encoder->pre_gfx.encoder = NULL;
+
+ /* We can start rendering once all pre-graphics work is done */
+ mtl_encode_signal_event(encoder->pre_gfx.cmd_buffer, encoder->event,
+ encoder->event_value);
+ }
+
+ assert(encoder);
+ enum kk_encoder_type type = encoder->main.last_used;
+ struct kk_encoder_internal *enc = kk_encoder_get_internal(encoder, type);
+ if (!enc || !enc->encoder)
+ return;
+
+ mtl_fence *fence = mtl_new_fence(encoder->dev);
+ switch (type) {
+ case KK_ENC_RENDER:
+ mtl_render_update_fence(enc->encoder, fence);
+ break;
+ case KK_ENC_COMPUTE:
+ mtl_compute_update_fence(enc->encoder, fence);
+ break;
+ case KK_ENC_BLIT:
+ mtl_blit_update_fence(enc->encoder, fence);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ mtl_end_encoding(enc->encoder);
+ mtl_release(enc->encoder);
+ enc->encoder = NULL;
+ enc->last_used = KK_ENC_NONE;
+ enc->wait_fence = true;
+ util_dynarray_append(&enc->fences, mtl_fence *, fence);
+
+ if (cmd->drawable) {
+ mtl_present_drawable(enc->cmd_buffer, cmd->drawable);
+ cmd->drawable = NULL;
+ }
+ upload_queue_writes(cmd);
+}
+
+static void
+kk_post_execution_release_internal(struct kk_encoder_internal *encoder)
+{
+ mtl_release(encoder->cmd_buffer);
+ util_dynarray_foreach(&encoder->fences, mtl_fence *, fence)
+ mtl_release(*fence);
+ util_dynarray_fini(&encoder->fences);
+}
+
+static void
+kk_post_execution_release(void *data)
+{
+ struct kk_encoder *encoder = data;
+ kk_post_execution_release_internal(&encoder->main);
+ kk_post_execution_release_internal(&encoder->pre_gfx);
+ mtl_release(encoder->event);
+ util_dynarray_fini(&encoder->imm_writes);
+ util_dynarray_fini(&encoder->resident_buffers);
+ util_dynarray_fini(&encoder->copy_query_pool_result_infos);
+ free(encoder);
+}
+
+void
+kk_encoder_submit(struct kk_encoder *encoder)
+{
+ assert(encoder);
+
+ mtl_add_completed_handler(encoder->main.cmd_buffer,
+ kk_post_execution_release, encoder);
+
+ mtl_command_buffer_commit(encoder->pre_gfx.cmd_buffer);
+ mtl_command_buffer_commit(encoder->main.cmd_buffer);
+}
+
+mtl_render_encoder *
+kk_render_encoder(struct kk_cmd_buffer *cmd)
+{
+ struct kk_encoder *encoder = cmd->encoder;
+ /* Render encoders are created at vkBeginRendering only */
+ assert(encoder->main.last_used == KK_ENC_RENDER && encoder->main.encoder);
+ return (mtl_render_encoder *)encoder->main.encoder;
+}
+
+mtl_compute_encoder *
+kk_compute_encoder(struct kk_cmd_buffer *cmd)
+{
+ struct kk_encoder *encoder = cmd->encoder;
+ return encoder->main.last_used == KK_ENC_COMPUTE
+ ? (mtl_blit_encoder *)encoder->main.encoder
+ : kk_encoder_start_compute(cmd);
+}
+
+mtl_blit_encoder *
+kk_blit_encoder(struct kk_cmd_buffer *cmd)
+{
+ struct kk_encoder *encoder = cmd->encoder;
+ return encoder->main.last_used == KK_ENC_BLIT
+ ? (mtl_blit_encoder *)encoder->main.encoder
+ : kk_encoder_start_blit(cmd);
+}
+
+struct kk_encoder_internal *
+kk_encoder_get_internal(struct kk_encoder *encoder, enum kk_encoder_type type)
+{
+ switch (type) {
+ case KK_ENC_NONE:
+ assert(encoder->main.last_used == KK_ENC_NONE);
+ return NULL;
+ case KK_ENC_RENDER:
+ assert(encoder->main.last_used == KK_ENC_RENDER);
+ return &encoder->main;
+ case KK_ENC_COMPUTE:
+ assert(encoder->main.last_used == KK_ENC_COMPUTE);
+ return &encoder->main;
+ case KK_ENC_BLIT:
+ assert(encoder->main.last_used == KK_ENC_BLIT);
+ return &encoder->main;
+ default:
+ assert(0);
+ return NULL;
+ }
+}
+
+static mtl_compute_encoder *
+kk_encoder_pre_gfx_encoder(struct kk_encoder *encoder)
+{
+ if (!encoder->pre_gfx.encoder) {
+ /* Fast-forward all previous render encoders and wait for the last one */
+ mtl_encode_signal_event(encoder->pre_gfx.cmd_buffer, encoder->event,
+ encoder->signal_value_pre_gfx);
+ mtl_encode_wait_for_event(encoder->pre_gfx.cmd_buffer, encoder->event,
+ encoder->wait_value_pre_gfx);
+ encoder->pre_gfx.encoder =
+ mtl_new_compute_command_encoder(encoder->pre_gfx.cmd_buffer);
+ }
+
+ return encoder->pre_gfx.encoder;
+}
+
+struct kk_triangle_fan_info {
+ uint64_t index_buffer;
+ uint64_t out_ptr;
+ uint64_t in_draw;
+ uint64_t out_draw;
+ uint32_t restart_index;
+ uint32_t index_buffer_size_el;
+ uint32_t in_el_size_B;
+ uint32_t out_el_size_B;
+ uint32_t flatshade_first;
+ uint32_t mode;
+};
+
+static void
+kk_encoder_render_triangle_fan_common(struct kk_cmd_buffer *cmd,
+ struct kk_triangle_fan_info *info,
+ mtl_buffer *indirect, mtl_buffer *index,
+ uint32_t index_count,
+ uint32_t in_el_size_B,
+ uint32_t out_el_size_B)
+{
+ uint32_t index_buffer_size_B = index_count * out_el_size_B;
+ uint32_t buffer_size_B =
+ sizeof(VkDrawIndexedIndirectCommand) + index_buffer_size_B;
+ struct kk_bo *index_buffer =
+ kk_cmd_allocate_buffer(cmd, buffer_size_B, out_el_size_B);
+
+ if (!index_buffer)
+ return;
+
+ info->out_ptr = index_buffer->gpu + sizeof(VkDrawIndexedIndirectCommand);
+ info->out_draw = index_buffer->gpu;
+ info->in_el_size_B = in_el_size_B;
+ info->out_el_size_B = out_el_size_B;
+ info->flatshade_first = true;
+ mtl_compute_encoder *encoder = kk_encoder_pre_gfx_encoder(cmd->encoder);
+ if (index)
+ mtl_compute_use_resource(encoder, index, MTL_RESOURCE_USAGE_READ);
+ mtl_compute_use_resource(encoder, indirect, MTL_RESOURCE_USAGE_READ);
+ mtl_compute_use_resource(encoder, index_buffer->map,
+ MTL_RESOURCE_USAGE_WRITE);
+
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+ kk_cmd_dispatch_pipeline(cmd, encoder,
+ kk_device_lib_pipeline(dev, KK_LIB_TRIANGLE_FAN),
+ info, sizeof(*info), 1u, 1u, 1u);
+
+ enum mtl_index_type index_type =
+ index_size_in_bytes_to_mtl_index_type(out_el_size_B);
+ mtl_render_encoder *enc = kk_render_encoder(cmd);
+ mtl_draw_indexed_primitives_indirect(
+ enc, cmd->state.gfx.primitive_type, index_type, index_buffer->map,
+ sizeof(VkDrawIndexedIndirectCommand), index_buffer->map, 0u);
+}
+
+void
+kk_encoder_render_triangle_fan_indirect(struct kk_cmd_buffer *cmd,
+ mtl_buffer *indirect, uint64_t offset)
+{
+ enum mesa_prim mode = cmd->state.gfx.prim;
+ uint32_t decomposed_index_count =
+ u_decomposed_prims_for_vertices(mode, cmd->state.gfx.vb.max_vertices) *
+ mesa_vertices_per_prim(mode);
+ uint32_t el_size_B = decomposed_index_count < UINT16_MAX ? 2u : 4u;
+ struct kk_triangle_fan_info info = {
+ .in_draw = mtl_buffer_get_gpu_address(indirect) + offset,
+ .restart_index = UINT32_MAX, /* No restart */
+ .mode = mode,
+ };
+ kk_encoder_render_triangle_fan_common(
+ cmd, &info, indirect, NULL, decomposed_index_count, el_size_B, el_size_B);
+}
+
+void
+kk_encoder_render_triangle_fan_indexed_indirect(struct kk_cmd_buffer *cmd,
+ mtl_buffer *indirect,
+ uint64_t offset,
+ bool increase_el_size)
+{
+ uint32_t el_size_B = cmd->state.gfx.index.bytes_per_index;
+
+ enum mesa_prim mode = cmd->state.gfx.prim;
+ uint32_t max_index_count =
+ (mtl_buffer_get_length(cmd->state.gfx.index.handle) -
+ cmd->state.gfx.index.offset) /
+ el_size_B;
+ uint32_t decomposed_index_count =
+ u_decomposed_prims_for_vertices(mode, max_index_count) *
+ mesa_vertices_per_prim(mode);
+
+ struct kk_triangle_fan_info info = {
+ .index_buffer = mtl_buffer_get_gpu_address(cmd->state.gfx.index.handle) +
+ cmd->state.gfx.index.offset,
+ .in_draw = mtl_buffer_get_gpu_address(indirect) + offset,
+ .restart_index =
+ increase_el_size ? UINT32_MAX : cmd->state.gfx.index.restart,
+ .index_buffer_size_el = max_index_count,
+ .mode = mode,
+ };
+ uint32_t out_el_size_B = increase_el_size ? sizeof(uint32_t) : el_size_B;
+ kk_encoder_render_triangle_fan_common(
+ cmd, &info, indirect, cmd->state.gfx.index.handle, decomposed_index_count,
+ el_size_B, out_el_size_B);
+}
diff --git a/src/kosmickrisp/vulkan/kk_encoder.h b/src/kosmickrisp/vulkan/kk_encoder.h
new file mode 100644
index 00000000000..5c45b87ecdd
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_encoder.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_ENCODER_H
+#define KK_ENCODER_H 1
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "util/u_dynarray.h"
+
+#include "vulkan/vulkan.h"
+
+struct kk_queue;
+struct kk_cmd_buffer;
+
+enum kk_encoder_type {
+ KK_ENC_NONE = 0,
+ KK_ENC_RENDER = BITFIELD_BIT(0),
+ KK_ENC_COMPUTE = BITFIELD_BIT(1),
+ KK_ENC_BLIT = BITFIELD_BIT(2),
+ KK_ENC_ALL = (KK_ENC_RENDER | KK_ENC_COMPUTE | KK_ENC_BLIT),
+ KK_ENC_COUNT = 3u,
+};
+
+struct kk_encoder_internal {
+ mtl_command_buffer *cmd_buffer;
+ mtl_command_encoder *encoder;
+
+ /* Used to know if we need to make heaps resident again */
+ uint32_t user_heap_hash;
+
+ /* Need to track last used to we can converge at submission */
+ enum kk_encoder_type last_used;
+
+ /* Used to synchronize between passes inside the same command buffer */
+ struct util_dynarray fences;
+ /* Tracks if we need to wait on the last fence present in fences at the start
+ * of the pass */
+ bool wait_fence;
+};
+
+struct kk_copy_query_pool_results_info {
+ uint64_t availability;
+ uint64_t results;
+ uint64_t indices;
+ uint64_t dst_addr;
+ uint64_t dst_stride;
+ uint32_t first_query;
+ VkQueryResultFlagBits flags;
+ uint16_t reports_per_query;
+ uint32_t query_count;
+};
+
+struct kk_encoder {
+ mtl_device *dev;
+ struct kk_encoder_internal main;
+ /* Compute only for pre gfx required work */
+ struct kk_encoder_internal pre_gfx;
+
+ /* Used to synchronize between main and pre_gfx encoders */
+ mtl_event *event;
+ uint64_t event_value;
+ /* Track what values pre_gfx must wait/signal before starting the encoding */
+ uint64_t wait_value_pre_gfx;
+ uint64_t signal_value_pre_gfx;
+
+ /* uint64_t pairs with first being the address, second being the value to
+ * write */
+ struct util_dynarray imm_writes;
+ /* mtl_buffers (destination buffers) so we can make them resident before the
+ * dispatch */
+ struct util_dynarray resident_buffers;
+ /* Array of kk_copy_quer_pool_results_info structs */
+ struct util_dynarray copy_query_pool_result_infos;
+};
+
+/* Allocates encoder and initialises/creates all resources required to start
+ * recording commands into the multiple encoders */
+VkResult kk_encoder_init(mtl_device *device, struct kk_queue *queue,
+ struct kk_encoder **encoder);
+
+/* Submits all command buffers and releases encoder memory. Requires all command
+ * buffers in the encoder to be linked to the last one used so the post
+ * execution callback is called once all are done */
+void kk_encoder_submit(struct kk_encoder *encoder);
+
+mtl_render_encoder *
+kk_encoder_start_render(struct kk_cmd_buffer *cmd,
+ mtl_render_pass_descriptor *descriptor,
+ uint32_t view_mask);
+
+mtl_compute_encoder *kk_encoder_start_compute(struct kk_cmd_buffer *cmd);
+
+mtl_compute_encoder *kk_encoder_start_blit(struct kk_cmd_buffer *cmd);
+
+/* Ends encoding on all command buffers */
+void kk_encoder_end(struct kk_cmd_buffer *cmd);
+
+/* Creates a fence and signals it inside the encoder, then ends encoding */
+void kk_encoder_signal_fence_and_end(struct kk_cmd_buffer *cmd);
+
+mtl_render_encoder *kk_render_encoder(struct kk_cmd_buffer *cmd);
+
+mtl_compute_encoder *kk_compute_encoder(struct kk_cmd_buffer *cmd);
+
+mtl_blit_encoder *kk_blit_encoder(struct kk_cmd_buffer *cmd);
+
+struct kk_encoder_internal *kk_encoder_get_internal(struct kk_encoder *encoder,
+ enum kk_encoder_type type);
+
+void upload_queue_writes(struct kk_cmd_buffer *cmd);
+
+void kk_encoder_render_triangle_fan_indirect(struct kk_cmd_buffer *cmd,
+ mtl_buffer *indirect,
+ uint64_t offset);
+
+void kk_encoder_render_triangle_fan_indexed_indirect(struct kk_cmd_buffer *cmd,
+ mtl_buffer *indirect,
+ uint64_t offset,
+ bool increase_el_size);
+
+#endif /* KK_ENCODER_H */
diff --git a/src/kosmickrisp/vulkan/kk_event.c b/src/kosmickrisp/vulkan/kk_event.c
new file mode 100644
index 00000000000..3a16d19e49e
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_event.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_event.h"
+
+#include "kk_bo.h"
+#include "kk_cmd_buffer.h"
+#include "kk_device.h"
+#include "kk_encoder.h"
+#include "kk_entrypoints.h"
+
+#define KK_EVENT_MEM_SIZE sizeof(uint64_t)
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateEvent(VkDevice device, const VkEventCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ struct kk_event *event;
+ VkResult result = VK_SUCCESS;
+
+ event = vk_object_zalloc(&dev->vk, pAllocator, sizeof(*event),
+ VK_OBJECT_TYPE_EVENT);
+ if (!event)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* TODO_KOSMICKRISP Bring back the heap. */
+ result = kk_alloc_bo(dev, &dev->vk.base, KK_EVENT_MEM_SIZE,
+ KK_EVENT_MEM_SIZE, &event->bo);
+ if (result != VK_SUCCESS) {
+ vk_object_free(&dev->vk, pAllocator, event);
+ return result;
+ }
+
+ event->status = event->bo->cpu;
+ event->addr = event->bo->gpu;
+ *event->status = VK_EVENT_RESET;
+
+ *pEvent = kk_event_to_handle(event);
+
+ return result;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_DestroyEvent(VkDevice device, VkEvent _event,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_event, event, _event);
+
+ if (!event)
+ return;
+
+ kk_destroy_bo(dev, event->bo);
+
+ vk_object_free(&dev->vk, pAllocator, event);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_GetEventStatus(VkDevice device, VkEvent _event)
+{
+ VK_FROM_HANDLE(kk_event, event, _event);
+
+ return *event->status;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_SetEvent(VkDevice device, VkEvent _event)
+{
+ VK_FROM_HANDLE(kk_event, event, _event);
+
+ *event->status = VK_EVENT_SET;
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_ResetEvent(VkDevice device, VkEvent _event)
+{
+ VK_FROM_HANDLE(kk_event, event, _event);
+
+ *event->status = VK_EVENT_RESET;
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event,
+ const VkDependencyInfo *pDependencyInfo)
+{
+ VK_FROM_HANDLE(kk_event, event, _event);
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ enum kk_encoder_type last_used = cmd->encoder->main.last_used;
+ kk_cmd_write(cmd, event->bo->map, event->addr, VK_EVENT_SET);
+ if (last_used != KK_ENC_NONE)
+ kk_encoder_signal_fence_and_end(cmd);
+ else
+ upload_queue_writes(cmd);
+
+ /* If we were inside a render pass, restart it loading attachments */
+ if (last_used == KK_ENC_RENDER) {
+ struct kk_graphics_state *state = &cmd->state.gfx;
+ assert(state->render_pass_descriptor);
+ kk_encoder_start_render(cmd, state->render_pass_descriptor,
+ state->render.view_mask);
+ kk_cmd_buffer_dirty_all_gfx(cmd);
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event,
+ VkPipelineStageFlags2 stageMask)
+{
+ VK_FROM_HANDLE(kk_event, event, _event);
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ enum kk_encoder_type last_used = cmd->encoder->main.last_used;
+ kk_cmd_write(cmd, event->bo->map, event->addr, VK_EVENT_RESET);
+ if (last_used != KK_ENC_NONE)
+ kk_encoder_signal_fence_and_end(cmd);
+ else
+ upload_queue_writes(cmd);
+
+ /* If we were inside a render pass, restart it loading attachments */
+ if (last_used == KK_ENC_RENDER) {
+ struct kk_graphics_state *state = &cmd->state.gfx;
+ assert(state->render_pass_descriptor);
+ kk_encoder_start_render(cmd, state->render_pass_descriptor,
+ state->render.view_mask);
+ kk_cmd_buffer_dirty_all_gfx(cmd);
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount,
+ const VkEvent *pEvents,
+ const VkDependencyInfo *pDependencyInfos)
+{
+ /* We do nothing, event should already be set by the time we are here. */
+}
diff --git a/src/kosmickrisp/vulkan/kk_event.h b/src/kosmickrisp/vulkan/kk_event.h
new file mode 100644
index 00000000000..4d6b7a14dcc
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_event.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_EVENT_H
+#define KK_EVENT_H 1
+
+#include "kk_private.h"
+
+#include "vk_object.h"
+
+struct kk_bo;
+
+struct kk_event {
+ struct vk_object_base base;
+ struct kk_bo *bo;
+
+ uint64_t addr;
+ uint64_t *status;
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
+
+#endif /* KK_EVENT_H */
diff --git a/src/kosmickrisp/vulkan/kk_format.c b/src/kosmickrisp/vulkan/kk_format.c
new file mode 100644
index 00000000000..1a0347d5ac6
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_format.c
@@ -0,0 +1,359 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_format.h"
+
+#include "kk_buffer_view.h"
+#include "kk_entrypoints.h"
+#include "kk_image.h"
+#include "kk_physical_device.h"
+
+#include "kosmickrisp/bridge/mtl_format.h"
+
+#include "vk_enum_defines.h"
+#include "vk_format.h"
+
+#define MTL_FMT_ALL_NO_ATOMIC(width) \
+ .bit_widths = width, .filter = 1u, .write = 1u, .color = 1u, .blend = 1u, \
+ .msaa = 1u, .resolve = 1u, .sparse = 1u, .atomic = 0u
+
+// Filter, Write, Color, Blend, MSAA, Sparse
+#define MTL_FMT_FWCBMS(width) \
+ .bit_widths = width, .filter = 1u, .write = 1u, .color = 1u, .blend = 1u, \
+ .msaa = 1u, .resolve = 0u, .sparse = 1u, .atomic = 0u
+
+// Filter, Color, Blend, MSAA, Resolve, Sparse
+#define MTL_FMT_FCBMRS(width) \
+ .bit_widths = width, .filter = 1u, .write = 0u, .color = 1u, .blend = 1u, \
+ .msaa = 1u, .resolve = 1u, .sparse = 1u, .atomic = 0u
+
+// Filter, Write, Color, Blend, MSAA
+#define MTL_FMT_FWCBM(width) \
+ .bit_widths = width, .filter = 1u, .write = 1u, .color = 1u, .blend = 1u, \
+ .msaa = 1u, .resolve = 0u, .sparse = 0u, .atomic = 0u
+
+// Write, Color, Blend, MSAA, Sparse
+#define MTL_FMT_WCBMS(width) \
+ .bit_widths = width, .filter = 0u, .write = 1u, .color = 1u, .blend = 1u, \
+ .msaa = 1u, .resolve = 0u, .sparse = 0u, .atomic = 0u
+
+// Write, Color, MSAA, Sparse
+#define MTL_FMT_WCMS(width) \
+ .bit_widths = width, .filter = 0u, .write = 1u, .color = 1u, .blend = 0u, \
+ .msaa = 1u, .resolve = 0u, .sparse = 1u, .atomic = 0u
+
+// Write, Color, Sparse, Atomic
+#define MTL_FMT_WCSA(width) \
+ .bit_widths = width, .filter = 0u, .write = 1u, .color = 1u, .blend = 0u, \
+ .msaa = 0u, .resolve = 0u, .sparse = 1u, .atomic = 1u
+
+// Write, Color, Sparse
+#define MTL_FMT_WCS(width) \
+ .bit_widths = width, .filter = 0u, .write = 1u, .color = 1u, .blend = 0u, \
+ .msaa = 0u, .resolve = 0u, .sparse = 1u, .atomic = 0u
+
+// Filter, MSAA, Resolve
+#define MTL_FMT_FMR(width) \
+ .bit_widths = width, .filter = 1u, .write = 0u, .color = 0u, .blend = 0u, \
+ .msaa = 1u, .resolve = 1u, .sparse = 0u, .atomic = 0u
+
+// Filter, Sparse
+#define MTL_FMT_FS(width) \
+ .bit_widths = width, .filter = 1u, .write = 0u, .color = 0u, .blend = 0u, \
+ .msaa = 0u, .resolve = 0u, .sparse = 1u, .atomic = 0u
+
+// MSAA, Resolve
+#define MTL_FMT_MR(width) \
+ .bit_widths = width, .filter = 0u, .write = 0u, .color = 0u, .blend = 0u, \
+ .msaa = 1u, .resolve = 1u, .sparse = 0u, .atomic = 0u
+
+// MSAA
+#define MTL_FMT_M(width) \
+ .bit_widths = width, .filter = 0u, .write = 0u, .color = 0u, .blend = 0u, \
+ .msaa = 1u, .resolve = 0u, .sparse = 0u, .atomic = 0u
+
+#define MTL_FMT_TB_ALL \
+ .texel_buffer = { \
+ .write = 1u, \
+ .read = 1u, \
+ .read_write = 1u, \
+ }
+
+#define MTL_FMT_TB_WR \
+ .texel_buffer = { \
+ .write = 1u, \
+ .read = 1u, \
+ .read_write = 0u, \
+ }
+
+#define MTL_FMT_TB_R \
+ .texel_buffer = { \
+ .write = 0u, \
+ .read = 1u, \
+ .read_write = 0u, \
+ }
+
+#define MTL_FMT_TB_NONE \
+ .texel_buffer = { \
+ .write = 0u, \
+ .read = 0u, \
+ .read_write = 0u, \
+ }
+
+#define MTL_SWIZZLE_IDENTITY \
+ .swizzle = { \
+ .red = PIPE_SWIZZLE_X, \
+ .green = PIPE_SWIZZLE_Y, \
+ .blue = PIPE_SWIZZLE_Z, \
+ .alpha = PIPE_SWIZZLE_W, \
+ }
+
+#define MTL_SWIZZLE_ABGR \
+ .swizzle = { \
+ .red = PIPE_SWIZZLE_W, \
+ .green = PIPE_SWIZZLE_Z, \
+ .blue = PIPE_SWIZZLE_Y, \
+ .alpha = PIPE_SWIZZLE_X, \
+ }
+
+#define MTL_SWIZZLE_BGRA \
+ .swizzle = { \
+ .red = PIPE_SWIZZLE_Z, \
+ .green = PIPE_SWIZZLE_Y, \
+ .blue = PIPE_SWIZZLE_X, \
+ .alpha = PIPE_SWIZZLE_W, \
+ }
+
+#define MTL_FMT(pipe_format, mtl_format, swizzle, capabilities, \
+ texel_buffer_capabilities, native) \
+ [PIPE_FORMAT_## \
+ pipe_format] = {.mtl_pixel_format = MTL_PIXEL_FORMAT_##mtl_format, \
+ swizzle, \
+ capabilities, \
+ texel_buffer_capabilities, \
+ .is_native = native}
+
+#define MTL_FMT_NATIVE(format, capabilities, texel_buffer_capabilities) \
+ [PIPE_FORMAT_##format] = {.mtl_pixel_format = MTL_PIXEL_FORMAT_##format, \
+ MTL_SWIZZLE_IDENTITY, \
+ capabilities, \
+ texel_buffer_capabilities, \
+ .is_native = 1}
+
+static const struct kk_va_format kk_vf_formats[] = {
+ // 8-bit formats
+ MTL_FMT_NATIVE(R8_UNORM, MTL_FMT_ALL_NO_ATOMIC(8), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(A8_UNORM, MTL_FMT_ALL_NO_ATOMIC(8), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R8_SRGB, MTL_FMT_ALL_NO_ATOMIC(8), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(R8_SNORM, MTL_FMT_ALL_NO_ATOMIC(8), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R8_UINT, MTL_FMT_WCMS(8), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R8_SINT, MTL_FMT_WCMS(8), MTL_FMT_TB_ALL),
+
+ // 16-bit formats
+ MTL_FMT_NATIVE(R16_UNORM, MTL_FMT_FWCBMS(16), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R16_SNORM, MTL_FMT_FWCBMS(16), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R16_UINT, MTL_FMT_WCMS(16), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R16_SINT, MTL_FMT_WCMS(16), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R16_FLOAT, MTL_FMT_ALL_NO_ATOMIC(16), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R8G8_UNORM, MTL_FMT_ALL_NO_ATOMIC(16), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R8G8_SNORM, MTL_FMT_ALL_NO_ATOMIC(16), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R8G8_SRGB, MTL_FMT_ALL_NO_ATOMIC(16), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(R8G8_UINT, MTL_FMT_WCMS(16), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R8G8_SINT, MTL_FMT_WCMS(16), MTL_FMT_TB_WR),
+
+ // 32-bit formats
+ MTL_FMT_NATIVE(R32_UINT, MTL_FMT_WCSA(32), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R32_SINT, MTL_FMT_WCSA(32), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R32_FLOAT, MTL_FMT_WCBMS(32), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R16G16_UNORM, MTL_FMT_FWCBMS(32), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R16G16_SNORM, MTL_FMT_FWCBMS(32), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R16G16_UINT, MTL_FMT_WCMS(32), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R16G16_SINT, MTL_FMT_WCMS(32), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R16G16_FLOAT, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R8G8B8A8_UNORM, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R8G8B8A8_SNORM, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R8G8B8A8_SRGB, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(R8G8B8A8_UINT, MTL_FMT_WCMS(32), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R8G8B8A8_SINT, MTL_FMT_WCMS(32), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(B8G8R8A8_UNORM, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_R),
+ MTL_FMT_NATIVE(B8G8R8A8_SRGB, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_NONE),
+
+ // 64-bit formats
+ MTL_FMT_NATIVE(R32G32_UINT, MTL_FMT_WCMS(64), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R32G32_SINT, MTL_FMT_WCMS(64), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R32G32_FLOAT, MTL_FMT_WCBMS(64), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R16G16B16A16_UNORM, MTL_FMT_FWCBMS(64), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R16G16B16A16_SNORM, MTL_FMT_FWCBMS(64), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R16G16B16A16_UINT, MTL_FMT_WCMS(64), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R16G16B16A16_SINT, MTL_FMT_WCMS(64), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R16G16B16A16_FLOAT, MTL_FMT_ALL_NO_ATOMIC(64),
+ MTL_FMT_TB_ALL),
+
+ // 128-bit formats
+ MTL_FMT_NATIVE(R32G32B32A32_UINT, MTL_FMT_WCS(128), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R32G32B32A32_SINT, MTL_FMT_WCS(128), MTL_FMT_TB_ALL),
+ MTL_FMT_NATIVE(R32G32B32A32_FLOAT, MTL_FMT_WCMS(128), MTL_FMT_TB_ALL),
+
+ // 16-bit packed formats
+ MTL_FMT_NATIVE(B5G6R5_UNORM, MTL_FMT_FCBMRS(16), MTL_FMT_TB_NONE),
+ /* Hardware has issues with border color opaque black, and since it's not
+ * required by Vulkan, we can just disable it.
+ */
+ /* MTL_FMT_NATIVE(A1B5G5R5_UNORM, MTL_FMT_FCBMRS(16), MTL_FMT_TB_NONE), */
+ MTL_FMT_NATIVE(A4B4G4R4_UNORM, MTL_FMT_FCBMRS(16), MTL_FMT_TB_NONE),
+ MTL_FMT(R4G4B4A4_UNORM, A4B4G4R4_UNORM, MTL_SWIZZLE_ABGR, MTL_FMT_FCBMRS(16),
+ MTL_FMT_TB_NONE, false),
+ MTL_FMT(A4R4G4B4_UNORM, A4B4G4R4_UNORM, MTL_SWIZZLE_BGRA, MTL_FMT_FCBMRS(16),
+ MTL_FMT_TB_NONE, false),
+ MTL_FMT_NATIVE(B5G5R5A1_UNORM, MTL_FMT_FCBMRS(16), MTL_FMT_TB_NONE),
+
+ // 32-bit packed formats
+ MTL_FMT_NATIVE(R10G10B10A2_UNORM, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(B10G10R10A2_UNORM, MTL_FMT_ALL_NO_ATOMIC(32),
+ MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(R10G10B10A2_UINT, MTL_FMT_WCMS(32), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R11G11B10_FLOAT, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_WR),
+ MTL_FMT_NATIVE(R9G9B9E5_FLOAT, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_NONE),
+
+ // ASTC formats
+ MTL_FMT_NATIVE(ASTC_4x4, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_5x4, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_5x5, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_6x5, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_6x6, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_8x5, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_8x6, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_8x8, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_10x5, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_10x6, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_10x8, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_10x10, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_12x10, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_12x12, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+
+ MTL_FMT_NATIVE(ASTC_4x4_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_5x4_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_5x5_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_6x5_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_6x6_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_8x5_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_8x6_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_8x8_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_10x5_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_10x6_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_10x8_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_10x10_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_12x10_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ASTC_12x12_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+
+ // EAC/ETC formats
+ MTL_FMT_NATIVE(ETC2_R11_UNORM, MTL_FMT_FS(64), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ETC2_R11_SNORM, MTL_FMT_FS(64), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ETC2_RG11_UNORM, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ETC2_RG11_SNORM, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ETC2_RGBA8, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ETC2_SRGBA8, MTL_FMT_FS(128), MTL_FMT_TB_NONE),
+
+ MTL_FMT_NATIVE(ETC2_RGB8, MTL_FMT_FS(64), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ETC2_SRGB8, MTL_FMT_FS(64), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ETC2_RGB8A1, MTL_FMT_FS(64), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(ETC2_SRGB8A1, MTL_FMT_FS(64), MTL_FMT_TB_NONE),
+
+ // Compressed PVRTC, HDR ASTC, BC TODO_KOSMICKRISP
+ // YUV formats TODO_KOSMICKRISP
+ // Extended range and wide color formats TODO_KOSMICKRISP
+
+ // Depth and stencil formats
+ MTL_FMT_NATIVE(Z16_UNORM, MTL_FMT_FMR(16), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(Z32_FLOAT, MTL_FMT_MR(32), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(S8_UINT, MTL_FMT_M(8), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(Z32_FLOAT_S8X24_UINT, MTL_FMT_MR(64), MTL_FMT_TB_NONE),
+ MTL_FMT_NATIVE(X32_S8X24_UINT, MTL_FMT_MR(64), MTL_FMT_TB_NONE),
+};
+
+#undef MTL_FMT_NATIVE
+#undef MTL_FMT
+
+#undef MTL_SWIZZLE_BGRA
+#undef MTL_SWIZZLE_ABGR
+#undef MTL_SWIZZLE_IDENTITY
+
+#undef MTL_FMT_ALL_NO_ATOMIC
+#undef MTL_FMT_FWCBMS
+#undef MTL_FMT_FCBMRS
+#undef MTL_FMT_FWCBM
+#undef MTL_FMT_WCBMS
+#undef MTL_FMT_WCMS
+#undef MTL_FMT_WCSA
+#undef MTL_FMT_WCS
+#undef MTL_FMT_FMR
+#undef MTL_FMT_FS
+#undef MTL_FMT_MR
+#undef MTL_FMT_M
+
+#undef MTL_FMT_TB_ALL
+#undef MTL_FMT_TB_WR
+#undef MTL_FMT_TB_R
+#undef MTL_FMT_TB_NONE
+
+const struct kk_va_format *
+kk_get_va_format(enum pipe_format format)
+{
+ if (format >= ARRAY_SIZE(kk_vf_formats))
+ return NULL;
+
+ if (kk_vf_formats[format].bit_widths == 0)
+ return NULL;
+
+ return &kk_vf_formats[format];
+}
+
+enum mtl_pixel_format
+vk_format_to_mtl_pixel_format(VkFormat vkformat)
+{
+ enum pipe_format format = vk_format_to_pipe_format(vkformat);
+ const struct kk_va_format *supported_format = kk_get_va_format(format);
+ assert(supported_format);
+ return supported_format->mtl_pixel_format;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice,
+ VkFormat format,
+ VkFormatProperties2 *pFormatProperties)
+{
+ VK_FROM_HANDLE(kk_physical_device, pdevice, physicalDevice);
+
+ VkFormatFeatureFlags2 linear2, optimal2, buffer2;
+ linear2 =
+ kk_get_image_format_features(pdevice, format, VK_IMAGE_TILING_LINEAR, 0);
+ optimal2 =
+ kk_get_image_format_features(pdevice, format, VK_IMAGE_TILING_OPTIMAL, 0);
+ buffer2 = kk_get_buffer_format_features(pdevice, format);
+
+ pFormatProperties->formatProperties = (VkFormatProperties){
+ .linearTilingFeatures = vk_format_features2_to_features(linear2),
+ .optimalTilingFeatures = vk_format_features2_to_features(optimal2),
+ .bufferFeatures = vk_format_features2_to_features(buffer2),
+ };
+
+ vk_foreach_struct(ext, pFormatProperties->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3: {
+ VkFormatProperties3 *p = (void *)ext;
+ p->linearTilingFeatures = linear2;
+ p->optimalTilingFeatures = optimal2;
+ p->bufferFeatures = buffer2;
+ break;
+ }
+
+ default:
+ vk_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+}
diff --git a/src/kosmickrisp/vulkan/kk_format.h b/src/kosmickrisp/vulkan/kk_format.h
new file mode 100644
index 00000000000..64541a659e5
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_format.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_FORMAT_H
+#define KK_FORMAT_H 1
+
+#include "kk_private.h"
+
+#include "util/format/u_format.h"
+
+struct kk_physical_device;
+enum pipe_format;
+enum mtl_pixel_format;
+
+struct kk_va_format {
+ /* Would love to use enum pipe_swizzle, but it's bigger than the required
+ * type for util_format_compose_swizzles... */
+ struct {
+ union {
+ struct {
+ uint8_t red;
+ uint8_t green;
+ uint8_t blue;
+ uint8_t alpha;
+ };
+ uint8_t channels[4];
+ };
+ } swizzle;
+ uint32_t mtl_pixel_format;
+ uint8_t bit_widths;
+ uint8_t filter : 1;
+ uint8_t write : 1;
+ uint8_t color : 1;
+ uint8_t blend : 1;
+ uint8_t msaa : 1;
+ uint8_t resolve : 1;
+ uint8_t sparse : 1;
+ uint8_t atomic : 1;
+ struct {
+ uint8_t write : 1;
+ uint8_t read : 1;
+ uint8_t read_write : 1;
+ } texel_buffer;
+ uint8_t is_native : 1;
+};
+
+const struct kk_va_format *kk_get_va_format(enum pipe_format format);
+
+enum mtl_pixel_format vk_format_to_mtl_pixel_format(enum VkFormat vkformat);
+
+#endif /* KK_FORMAT_H */
diff --git a/src/kosmickrisp/vulkan/kk_image.c b/src/kosmickrisp/vulkan/kk_image.c
new file mode 100644
index 00000000000..0b05fc82002
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_image.c
@@ -0,0 +1,967 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_image.h"
+
+#include "kk_device.h"
+#include "kk_device_memory.h"
+#include "kk_entrypoints.h"
+#include "kk_format.h"
+#include "kk_physical_device.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+#include "vk_enum_defines.h"
+#include "vk_enum_to_str.h"
+#include "vk_format.h"
+#include "wsi_common_private.h"
+
+static VkFormatFeatureFlags2
+kk_get_image_plane_format_features(struct kk_physical_device *pdev,
+ VkFormat vk_format, VkImageTiling tiling,
+ uint64_t drm_format_mod)
+{
+ VkFormatFeatureFlags2 features = 0;
+
+ /* Metal does not support linear tiling for compressed formats */
+ if (tiling == VK_IMAGE_TILING_LINEAR && vk_format_is_compressed(vk_format))
+ return 0;
+
+ enum pipe_format p_format = vk_format_to_pipe_format(vk_format);
+ if (p_format == PIPE_FORMAT_NONE)
+ return 0;
+
+ /* You can't tile a non-power-of-two */
+ if (!util_is_power_of_two_nonzero(util_format_get_blocksize(p_format)))
+ return 0;
+
+ const struct kk_va_format *va_format = kk_get_va_format(p_format);
+ if (va_format == NULL)
+ return 0;
+
+ // Textures can at least be sampled
+ features |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT;
+ features |= VK_FORMAT_FEATURE_2_BLIT_SRC_BIT;
+
+ if (va_format->filter) {
+ features |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+ features |=
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_MINMAX_BIT; // TODO_KOSMICKRISP
+ // Understand if
+ // we want to
+ // expose this
+ }
+
+ /* TODO: VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_DEPTH_COMPARISON_BIT */
+ if (vk_format_has_depth(vk_format)) {
+ features |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_DEPTH_COMPARISON_BIT;
+ }
+
+ /* We disable A8 format due to lower blend pass issues */
+ if (va_format->color && tiling != VK_IMAGE_TILING_LINEAR &&
+ vk_format != VK_FORMAT_A8_UNORM) {
+ features |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT;
+ features |= VK_FORMAT_FEATURE_2_BLIT_DST_BIT;
+ // TODO_KOSMICKRISP Support snorm formats once the following spec issue is
+ // resolved: https://gitlab.khronos.org/vulkan/vulkan/-/issues/4293
+ if (!vk_format_is_snorm(vk_format))
+ features |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT;
+ }
+
+ if (vk_format_is_depth_or_stencil(vk_format)) {
+ if (tiling == VK_IMAGE_TILING_LINEAR)
+ return 0;
+
+ features |= VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT;
+ }
+
+ if (va_format->write) {
+ features |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT;
+ }
+
+ if (va_format->atomic)
+ features |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_ATOMIC_BIT;
+
+ if (features != 0) {
+ features |= VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT;
+ features |= VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT;
+ }
+
+ return features;
+}
+
+VkFormatFeatureFlags2
+kk_get_image_format_features(struct kk_physical_device *pdev,
+ VkFormat vk_format, VkImageTiling tiling,
+ uint64_t drm_format_mod)
+{
+ const struct vk_format_ycbcr_info *ycbcr_info =
+ vk_format_get_ycbcr_info(vk_format);
+ if (ycbcr_info == NULL) {
+ return kk_get_image_plane_format_features(pdev, vk_format, tiling,
+ drm_format_mod);
+ }
+
+ /* For multi-plane, we get the feature flags of each plane separately,
+ * then take their intersection as the overall format feature flags
+ */
+ VkFormatFeatureFlags2 features = ~0ull;
+ bool cosited_chroma = false;
+ for (uint8_t plane = 0; plane < ycbcr_info->n_planes; plane++) {
+ const struct vk_format_ycbcr_plane *plane_info =
+ &ycbcr_info->planes[plane];
+ features &= kk_get_image_plane_format_features(pdev, plane_info->format,
+ tiling, drm_format_mod);
+ if (plane_info->denominator_scales[0] > 1 ||
+ plane_info->denominator_scales[1] > 1)
+ cosited_chroma = true;
+ }
+ if (features == 0)
+ return 0;
+
+ /* Uh... We really should be able to sample from YCbCr */
+ assert(features & VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT);
+ assert(features & VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT);
+
+ /* These aren't allowed for YCbCr formats */
+ features &=
+ ~(VK_FORMAT_FEATURE_2_BLIT_SRC_BIT | VK_FORMAT_FEATURE_2_BLIT_DST_BIT |
+ VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT |
+ VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT |
+ VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
+
+ /* This is supported on all YCbCr formats */
+ features |=
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT;
+
+ if (ycbcr_info->n_planes > 1) {
+ /* DISJOINT_BIT implies that each plane has its own separate binding,
+ * while SEPARATE_RECONSTRUCTION_FILTER_BIT implies that luma and chroma
+ * each have their own, separate filters, so these two bits make sense
+ * for multi-planar formats only.
+ *
+ * For MIDPOINT_CHROMA_SAMPLES_BIT, NVIDIA HW on single-plane interleaved
+ * YCbCr defaults to COSITED_EVEN, which is inaccurate and fails tests.
+ * This can be fixed with a NIR tweak but for now, we only enable this bit
+ * for multi-plane formats. See Issue #9525 on the mesa/main tracker.
+ */
+ features |=
+ VK_FORMAT_FEATURE_DISJOINT_BIT |
+ VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_YCBCR_CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT |
+ VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT;
+ }
+
+ if (cosited_chroma)
+ features |= VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT;
+
+ return features;
+}
+
+static VkFormatFeatureFlags2
+vk_image_usage_to_format_features(VkImageUsageFlagBits usage_flag)
+{
+ assert(util_bitcount(usage_flag) == 1);
+ switch (usage_flag) {
+ case VK_IMAGE_USAGE_TRANSFER_SRC_BIT:
+ return VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT |
+ VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+ case VK_IMAGE_USAGE_TRANSFER_DST_BIT:
+ return VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT |
+ VK_FORMAT_FEATURE_BLIT_DST_BIT;
+ case VK_IMAGE_USAGE_SAMPLED_BIT:
+ return VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT;
+ case VK_IMAGE_USAGE_STORAGE_BIT:
+ return VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT;
+ case VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT:
+ return VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT;
+ case VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT:
+ return VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT;
+ case VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT:
+ return VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT |
+ VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT;
+ case VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR:
+ return VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
+ default:
+ return 0;
+ }
+}
+
+uint32_t
+kk_image_max_dimension(VkImageType image_type)
+{
+ /* Values taken from Apple7
+ * https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
+ switch (image_type) {
+ case VK_IMAGE_TYPE_1D:
+ case VK_IMAGE_TYPE_2D:
+ return 16384;
+ case VK_IMAGE_TYPE_3D:
+ return 2048;
+ default:
+ UNREACHABLE("Invalid image type");
+ }
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_GetPhysicalDeviceImageFormatProperties2(
+ VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
+ VkImageFormatProperties2 *pImageFormatProperties)
+{
+ VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice);
+
+ const VkPhysicalDeviceExternalImageFormatInfo *external_info =
+ vk_find_struct_const(pImageFormatInfo->pNext,
+ PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO);
+
+ /* Initialize to zero in case we return VK_ERROR_FORMAT_NOT_SUPPORTED */
+ memset(&pImageFormatProperties->imageFormatProperties, 0,
+ sizeof(pImageFormatProperties->imageFormatProperties));
+
+ /* Metal does not support depth/stencil textures that are not 2D (we make 1D
+ * textures 2D) */
+ if (vk_format_is_depth_or_stencil(pImageFormatInfo->format) &&
+ pImageFormatInfo->type == VK_IMAGE_TYPE_3D)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ /* Metal does not support EAC/ETC formats for 3D textures. */
+ if (util_format_is_etc(vk_format_to_pipe_format(pImageFormatInfo->format)) &&
+ pImageFormatInfo->type == VK_IMAGE_TYPE_3D)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ /* Metal disallows reading compressed formats as uncompressed format.
+ * VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT is only used with
+ * compressed formats.
+ */
+ if (pImageFormatInfo->flags &
+ VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ const struct vk_format_ycbcr_info *ycbcr_info =
+ vk_format_get_ycbcr_info(pImageFormatInfo->format);
+
+ /* For the purposes of these checks, we don't care about all the extra
+ * YCbCr features and we just want the accumulation of features available
+ * to all planes of the given format.
+ */
+ VkFormatFeatureFlags2 features;
+ if (ycbcr_info == NULL) {
+ features = kk_get_image_plane_format_features(
+ pdev, pImageFormatInfo->format, pImageFormatInfo->tiling, 0u);
+ } else {
+ features = ~0ull;
+ assert(ycbcr_info->n_planes > 0);
+ for (uint8_t plane = 0; plane < ycbcr_info->n_planes; plane++) {
+ const VkFormat plane_format = ycbcr_info->planes[plane].format;
+ features &= kk_get_image_plane_format_features(
+ pdev, plane_format, pImageFormatInfo->tiling, 0u);
+ }
+ }
+
+ if (features == 0)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if (pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR &&
+ pImageFormatInfo->type == VK_IMAGE_TYPE_3D)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ /* TODO_KOSMICKRISP We could allow linear images that are used as render
+ * target as long as they are not used as input attachments. Main reason for
+ * this is that we expect arrays when rendering and reading from input
+ * attachments and Metal disallows arrays for linear textures.
+ */
+ if (pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR &&
+ (pImageFormatInfo->usage &
+ (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if (ycbcr_info && pImageFormatInfo->type != VK_IMAGE_TYPE_2D)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ /* Don't support sparse residency */
+ if ((pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ /* From the Vulkan 1.3.279 spec:
+ *
+ * VUID-VkImageCreateInfo-tiling-04121
+ *
+ * "If tiling is VK_IMAGE_TILING_LINEAR, flags must not contain
+ * VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT"
+ *
+ * VUID-VkImageCreateInfo-imageType-00970
+ *
+ * "If imageType is VK_IMAGE_TYPE_1D, flags must not contain
+ * VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT"
+ */
+ if ((pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) &&
+ (pImageFormatInfo->type == VK_IMAGE_TYPE_1D ||
+ pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ /* From the Vulkan 1.3.279 spec:
+ *
+ * VUID-VkImageCreateInfo-flags-09403
+ *
+ * "If flags contains VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT, flags
+ * must not include VK_IMAGE_CREATE_SPARSE_ALIASED_BIT,
+ * VK_IMAGE_CREATE_SPARSE_BINDING_BIT, or
+ * VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT"
+ */
+ if ((pImageFormatInfo->flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT) &&
+ (pImageFormatInfo->flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT |
+ VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
+ VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if (pImageFormatInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT &&
+ pImageFormatInfo->type != VK_IMAGE_TYPE_2D)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ const uint32_t max_dim = kk_image_max_dimension(pImageFormatInfo->type);
+ assert(util_is_power_of_two_nonzero(max_dim));
+ uint32_t maxMipLevels = util_logbase2(max_dim) + 1;
+ VkExtent3D maxExtent;
+ uint32_t maxArraySize;
+ switch (pImageFormatInfo->type) {
+ case VK_IMAGE_TYPE_1D:
+ maxExtent = (VkExtent3D){max_dim, 1, 1};
+ maxArraySize = 2048u;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ maxExtent = (VkExtent3D){max_dim, max_dim, 1};
+ maxArraySize = 2048u;
+ break;
+ case VK_IMAGE_TYPE_3D:
+ maxExtent = (VkExtent3D){max_dim, max_dim, max_dim};
+ maxArraySize = 1u;
+ break;
+ default:
+ UNREACHABLE("Invalid image type");
+ }
+ if (pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR)
+ maxArraySize = 1;
+
+ if (ycbcr_info != NULL || pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR)
+ maxMipLevels = 1;
+
+ if (pImageFormatInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+ maxArraySize = 1;
+ maxMipLevels = 1;
+ }
+
+ VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
+ if (pImageFormatInfo->tiling == VK_IMAGE_TILING_OPTIMAL &&
+ pImageFormatInfo->type == VK_IMAGE_TYPE_2D && ycbcr_info == NULL &&
+ (features & (VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT |
+ VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
+ !(pImageFormatInfo->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)) {
+ sampleCounts =
+ VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT |
+ // TODO_KOSMICKRISP Modify sample count based on what pdev supports
+ VK_SAMPLE_COUNT_4_BIT /* |
+ VK_SAMPLE_COUNT_8_BIT */
+ ;
+ }
+
+ /* From the Vulkan 1.2.199 spec:
+ *
+ * "VK_IMAGE_CREATE_EXTENDED_USAGE_BIT specifies that the image can be
+ * created with usage flags that are not supported for the format the
+ * image is created with but are supported for at least one format a
+ * VkImageView created from the image can have."
+ *
+ * If VK_IMAGE_CREATE_EXTENDED_USAGE_BIT is set, views can be created with
+ * different usage than the image so we can't always filter on usage.
+ * There is one exception to this below for storage.
+ */
+ const VkImageUsageFlags image_usage = pImageFormatInfo->usage;
+ VkImageUsageFlags view_usage = image_usage;
+ if (pImageFormatInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT)
+ view_usage = 0;
+
+ u_foreach_bit(b, view_usage) {
+ VkFormatFeatureFlags2 usage_features =
+ vk_image_usage_to_format_features(1 << b);
+ if (usage_features && !(features & usage_features))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+ }
+
+ const VkExternalMemoryProperties *ext_mem_props = NULL;
+ if (external_info != NULL && external_info->handleType != 0) {
+ /* We only support heaps since that's the backing for all our memory and
+ * simplifies implementation */
+ switch (external_info->handleType) {
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT:
+ ext_mem_props = &kk_mtlheap_mem_props;
+ break;
+ default:
+ /* From the Vulkan 1.3.256 spec:
+ *
+ * "If handleType is not compatible with the [parameters] in
+ * VkPhysicalDeviceImageFormatInfo2, then
+ * vkGetPhysicalDeviceImageFormatProperties2 returns
+ * VK_ERROR_FORMAT_NOT_SUPPORTED."
+ */
+ return vk_errorf(pdev, VK_ERROR_FORMAT_NOT_SUPPORTED,
+ "unsupported VkExternalMemoryHandleTypeFlagBits: %s ",
+ vk_ExternalMemoryHandleTypeFlagBits_to_str(
+ external_info->handleType));
+ }
+ }
+
+ const unsigned plane_count =
+ vk_format_get_plane_count(pImageFormatInfo->format);
+
+ /* From the Vulkan 1.3.259 spec, VkImageCreateInfo:
+ *
+ * VUID-VkImageCreateInfo-imageCreateFormatFeatures-02260
+ *
+ * "If format is a multi-planar format, and if imageCreateFormatFeatures
+ * (as defined in Image Creation Limits) does not contain
+ * VK_FORMAT_FEATURE_DISJOINT_BIT, then flags must not contain
+ * VK_IMAGE_CREATE_DISJOINT_BIT"
+ *
+ * This is satisfied trivially because we support DISJOINT on all
+ * multi-plane formats. Also,
+ *
+ * VUID-VkImageCreateInfo-format-01577
+ *
+ * "If format is not a multi-planar format, and flags does not include
+ * VK_IMAGE_CREATE_ALIAS_BIT, flags must not contain
+ * VK_IMAGE_CREATE_DISJOINT_BIT"
+ */
+ if (plane_count == 1 &&
+ !(pImageFormatInfo->flags & VK_IMAGE_CREATE_ALIAS_BIT) &&
+ (pImageFormatInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if (ycbcr_info &&
+ ((pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) ||
+ (pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if ((pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) &&
+ (pImageFormatInfo->usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ pImageFormatProperties->imageFormatProperties = (VkImageFormatProperties){
+ .maxExtent = maxExtent,
+ .maxMipLevels = maxMipLevels,
+ .maxArrayLayers = maxArraySize,
+ .sampleCounts = sampleCounts,
+ .maxResourceSize = UINT32_MAX, /* TODO */
+ };
+
+ vk_foreach_struct(s, pImageFormatProperties->pNext) {
+ switch (s->sType) {
+ case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: {
+ VkExternalImageFormatProperties *p = (void *)s;
+ /* From the Vulkan 1.3.256 spec:
+ *
+ * "If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2
+ * will behave as if VkPhysicalDeviceExternalImageFormatInfo was
+ * not present, and VkExternalImageFormatProperties will be
+ * ignored."
+ *
+ * This is true if and only if ext_mem_props == NULL
+ */
+ if (ext_mem_props != NULL)
+ p->externalMemoryProperties = *ext_mem_props;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES: {
+ VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = (void *)s;
+ ycbcr_props->combinedImageSamplerDescriptorCount = plane_count;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_HOST_IMAGE_COPY_DEVICE_PERFORMANCE_QUERY_EXT: {
+ VkHostImageCopyDevicePerformanceQueryEXT *host_props = (void *)s;
+ host_props->optimalDeviceAccess = true;
+ host_props->identicalMemoryLayout = true;
+ break;
+ }
+ default:
+ vk_debug_ignored_stype(s->sType);
+ break;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetPhysicalDeviceSparseImageFormatProperties2(
+ VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
+ uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties)
+{
+ *pPropertyCount = 0;
+ return;
+}
+
+static VkResult
+kk_image_init(struct kk_device *dev, struct kk_image *image,
+ const VkImageCreateInfo *pCreateInfo)
+{
+ vk_image_init(&dev->vk, &image->vk, pCreateInfo);
+
+ if ((image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
+ image->vk.samples > 1) {
+ image->vk.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
+ image->vk.stencil_usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
+ }
+
+ if (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT)
+ image->vk.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
+ if (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
+ if (util_format_is_depth_or_stencil(
+ vk_format_to_pipe_format(image->vk.format))) {
+ image->vk.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
+ image->vk.stencil_usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
+ } else {
+ image->vk.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ }
+ }
+
+ image->plane_count = vk_format_get_plane_count(pCreateInfo->format);
+ image->disjoint = image->plane_count > 1 &&
+ (pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT);
+
+ const struct vk_format_ycbcr_info *ycbcr_info =
+ vk_format_get_ycbcr_info(pCreateInfo->format);
+ for (uint8_t plane = 0; plane < image->plane_count; plane++) {
+ VkFormat format =
+ ycbcr_info ? ycbcr_info->planes[plane].format : pCreateInfo->format;
+ const uint8_t width_scale =
+ ycbcr_info ? ycbcr_info->planes[plane].denominator_scales[0] : 1;
+ const uint8_t height_scale =
+ ycbcr_info ? ycbcr_info->planes[plane].denominator_scales[1] : 1;
+ kk_image_layout_init(dev, pCreateInfo, vk_format_to_pipe_format(format),
+ width_scale, height_scale,
+ &image->planes[plane].layout);
+ }
+
+ if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ kk_image_layout_init(dev, pCreateInfo, PIPE_FORMAT_R32_UINT, 1, 1,
+ &image->stencil_copy_temp.layout);
+ }
+
+ return VK_SUCCESS;
+}
+
+static void
+kk_image_plane_size_align_B(struct kk_device *dev, const struct kk_image *image,
+ const struct kk_image_plane *plane,
+ uint64_t *size_B_out, uint64_t *align_B_out)
+{
+ *size_B_out = plane->layout.size_B;
+ *align_B_out = plane->layout.align_B;
+}
+
+static void
+kk_image_plane_finish(struct kk_device *dev, struct kk_image_plane *plane,
+ VkImageCreateFlags create_flags,
+ const VkAllocationCallbacks *pAllocator)
+{
+ if (plane->mtl_handle != NULL)
+ mtl_release(plane->mtl_handle);
+ if (plane->mtl_handle_array != NULL)
+ mtl_release(plane->mtl_handle_array);
+}
+
+static void
+kk_image_finish(struct kk_device *dev, struct kk_image *image,
+ const VkAllocationCallbacks *pAllocator)
+{
+ for (uint8_t plane = 0; plane < image->plane_count; plane++) {
+ kk_image_plane_finish(dev, &image->planes[plane], image->vk.create_flags,
+ pAllocator);
+ }
+
+ if (image->stencil_copy_temp.layout.size_B > 0) {
+ kk_image_plane_finish(dev, &image->stencil_copy_temp,
+ image->vk.create_flags, pAllocator);
+ }
+
+ vk_image_finish(&image->vk);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkImage *pImage)
+{
+ VK_FROM_HANDLE(kk_device, dev, _device);
+ struct kk_physical_device *pdev = kk_device_physical(dev);
+ struct kk_image *image;
+ VkResult result;
+
+#ifdef KK_USE_WSI_PLATFORM
+ /* Ignore swapchain creation info on Android. Since we don't have an
+ * implementation in Mesa, we're guaranteed to access an Android object
+ * incorrectly.
+ */
+ const VkImageSwapchainCreateInfoKHR *swapchain_info =
+ vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
+ if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
+ return wsi_common_create_swapchain_image(
+ &pdev->wsi_device, pCreateInfo, swapchain_info->swapchain, pImage);
+ }
+#endif
+
+ image = vk_zalloc2(&dev->vk.alloc, pAllocator, sizeof(*image), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!image)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ result = kk_image_init(dev, image, pCreateInfo);
+ if (result != VK_SUCCESS) {
+ vk_free2(&dev->vk.alloc, pAllocator, image);
+ return result;
+ }
+
+ *pImage = kk_image_to_handle(image);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_DestroyImage(VkDevice device, VkImage _image,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_image, image, _image);
+
+ if (!image)
+ return;
+
+ kk_image_finish(dev, image, pAllocator);
+ vk_free2(&dev->vk.alloc, pAllocator, image);
+}
+
+static void
+kk_image_plane_add_req(struct kk_device *dev, const struct kk_image *image,
+ const struct kk_image_plane *plane, uint64_t *size_B,
+ uint32_t *align_B)
+{
+ assert(util_is_power_of_two_or_zero64(*align_B));
+ uint64_t plane_size_B, plane_align_B;
+ kk_image_plane_size_align_B(dev, image, plane, &plane_size_B,
+ &plane_align_B);
+
+ *align_B = MAX2(*align_B, plane_align_B);
+ *size_B = align64(*size_B, plane_align_B);
+ *size_B += plane_size_B;
+}
+
+static void
+kk_get_image_memory_requirements(struct kk_device *dev, struct kk_image *image,
+ VkImageAspectFlags aspects,
+ VkMemoryRequirements2 *pMemoryRequirements)
+{
+ struct kk_physical_device *pdev = kk_device_physical(dev);
+ uint32_t memory_types = (1 << pdev->mem_type_count) - 1;
+
+ /* Remove non host visible heaps from the types for host image copy in case
+ * of potential issues. This should be removed when we get ReBAR.
+ */
+ if (image->vk.usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT) {
+ for (uint32_t i = 0; i < pdev->mem_type_count; i++) {
+ if (!(pdev->mem_types[i].propertyFlags &
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
+ memory_types &= ~BITFIELD_BIT(i);
+ }
+ }
+
+ // TODO hope for the best?
+
+ uint64_t size_B = 0;
+ uint32_t align_B = 0;
+ if (image->disjoint) {
+ uint8_t plane = kk_image_memory_aspects_to_plane(image, aspects);
+ kk_image_plane_add_req(dev, image, &image->planes[plane], &size_B,
+ &align_B);
+ } else {
+ for (unsigned plane = 0; plane < image->plane_count; plane++) {
+ kk_image_plane_add_req(dev, image, &image->planes[plane], &size_B,
+ &align_B);
+ }
+ }
+
+ if (image->stencil_copy_temp.layout.size_B > 0) {
+ kk_image_plane_add_req(dev, image, &image->stencil_copy_temp, &size_B,
+ &align_B);
+ }
+
+ pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
+ pMemoryRequirements->memoryRequirements.alignment = align_B;
+ pMemoryRequirements->memoryRequirements.size = size_B;
+
+ vk_foreach_struct_const(ext, pMemoryRequirements->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+ VkMemoryDedicatedRequirements *dedicated = (void *)ext;
+ dedicated->prefersDedicatedAllocation =
+ image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
+ dedicated->requiresDedicatedAllocation =
+ image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT;
+ break;
+ }
+ default:
+ vk_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetImageMemoryRequirements2(VkDevice device,
+ const VkImageMemoryRequirementsInfo2 *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_image, image, pInfo->image);
+
+ const VkImagePlaneMemoryRequirementsInfo *plane_info =
+ vk_find_struct_const(pInfo->pNext, IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO);
+ const VkImageAspectFlags aspects =
+ image->disjoint ? plane_info->planeAspect : image->vk.aspects;
+
+ kk_get_image_memory_requirements(dev, image, aspects, pMemoryRequirements);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetDeviceImageMemoryRequirements(VkDevice device,
+ const VkDeviceImageMemoryRequirements *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ ASSERTED VkResult result;
+ struct kk_image image = {0};
+
+ result = kk_image_init(dev, &image, pInfo->pCreateInfo);
+ assert(result == VK_SUCCESS);
+
+ const VkImageAspectFlags aspects =
+ image.disjoint ? pInfo->planeAspect : image.vk.aspects;
+
+ kk_get_image_memory_requirements(dev, &image, aspects, pMemoryRequirements);
+
+ kk_image_finish(dev, &image, NULL);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetImageSparseMemoryRequirements2(
+ VkDevice device, const VkImageSparseMemoryRequirementsInfo2 *pInfo,
+ uint32_t *pSparseMemoryRequirementCount,
+ VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
+{
+ *pSparseMemoryRequirementCount = 0u;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetDeviceImageSparseMemoryRequirements(
+ VkDevice device, const VkDeviceImageMemoryRequirements *pInfo,
+ uint32_t *pSparseMemoryRequirementCount,
+ VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
+{
+ *pSparseMemoryRequirementCount = 0u;
+}
+
+static void
+kk_get_image_subresource_layout(struct kk_device *dev, struct kk_image *image,
+ const VkImageSubresource2KHR *pSubresource,
+ VkSubresourceLayout2KHR *pLayout)
+{
+ const VkImageSubresource *isr = &pSubresource->imageSubresource;
+
+ const uint8_t p = kk_image_memory_aspects_to_plane(image, isr->aspectMask);
+ const struct kk_image_plane *plane = &image->planes[p];
+
+ uint64_t offset_B = 0;
+ if (!image->disjoint) {
+ uint32_t align_B = 0;
+ for (unsigned i = 0; i < p; i++) {
+ kk_image_plane_add_req(dev, image, &image->planes[i], &offset_B,
+ &align_B);
+ }
+ }
+
+ pLayout->subresourceLayout = (VkSubresourceLayout){
+ .offset = offset_B,
+ .size = plane->layout.size_B,
+ .rowPitch = plane->layout.linear_stride_B,
+ .arrayPitch = plane->layout.layer_stride_B,
+ .depthPitch = 1u,
+ };
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetImageSubresourceLayout2KHR(VkDevice device, VkImage _image,
+ const VkImageSubresource2KHR *pSubresource,
+ VkSubresourceLayout2KHR *pLayout)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_image, image, _image);
+
+ kk_get_image_subresource_layout(dev, image, pSubresource, pLayout);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetDeviceImageSubresourceLayoutKHR(
+ VkDevice device, const VkDeviceImageSubresourceInfoKHR *pInfo,
+ VkSubresourceLayout2KHR *pLayout)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ ASSERTED VkResult result;
+ struct kk_image image = {0};
+
+ result = kk_image_init(dev, &image, pInfo->pCreateInfo);
+ assert(result == VK_SUCCESS);
+
+ kk_get_image_subresource_layout(dev, &image, pInfo->pSubresource, pLayout);
+
+ kk_image_finish(dev, &image, NULL);
+}
+
+static VkResult
+kk_image_plane_bind(struct kk_device *dev, struct kk_image *image,
+ struct kk_image_plane *plane, struct kk_device_memory *mem,
+ uint64_t *offset_B)
+{
+ uint64_t plane_size_B, plane_align_B;
+ kk_image_plane_size_align_B(dev, image, plane, &plane_size_B,
+ &plane_align_B);
+ *offset_B = align64(*offset_B, plane_align_B);
+
+ /* Linear textures in Metal need to be allocated through a buffer... */
+ if (plane->layout.optimized_layout)
+ plane->mtl_handle = mtl_new_texture_with_descriptor(
+ mem->bo->mtl_handle, &plane->layout, *offset_B);
+ else
+ plane->mtl_handle = mtl_new_texture_with_descriptor_linear(
+ mem->bo->map, &plane->layout, *offset_B);
+ plane->addr = mem->bo->gpu + *offset_B;
+
+ /* Create auxiliary 2D array texture for 3D images so we can use 2D views of
+ * it */
+ if (plane->layout.type == MTL_TEXTURE_TYPE_3D &&
+ (image->vk.create_flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT)) {
+ struct kk_image_layout array_layout = plane->layout;
+ array_layout.type = MTL_TEXTURE_TYPE_2D_ARRAY;
+ // TODO_KOSMICKRISP We need to make sure that this doesn't go over Metal's
+ // layer maximum which is 2048. Probably by limiting the dimensions and
+ // layers for 3D images
+ array_layout.layers = array_layout.layers * array_layout.depth_px;
+ array_layout.depth_px = 1u;
+ plane->mtl_handle_array = mtl_new_texture_with_descriptor(
+ mem->bo->mtl_handle, &array_layout, *offset_B);
+ }
+
+ *offset_B += plane_size_B;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+kk_bind_image_memory(struct kk_device *dev, const VkBindImageMemoryInfo *info)
+{
+ VK_FROM_HANDLE(kk_device_memory, mem, info->memory);
+ VK_FROM_HANDLE(kk_image, image, info->image);
+ VkResult result;
+
+ /* Ignore this struct on Android, we cannot access swapchain structures
+ * there. */
+#ifdef KK_USE_WSI_PLATFORM
+ const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
+ vk_find_struct_const(info->pNext, BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR);
+
+ if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
+ VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_info->swapchain);
+ VkImage _wsi_image =
+ swapchain->get_wsi_image(swapchain, swapchain_info->imageIndex)->image;
+ VK_FROM_HANDLE(kk_image, wsi_img, _wsi_image);
+
+ assert(image->plane_count == 1);
+ assert(wsi_img->plane_count == 1);
+
+ struct kk_image_plane *plane = &image->planes[0];
+ struct kk_image_plane *swapchain_plane = &wsi_img->planes[0];
+
+ /* Copy swapchain plane data retaining relevant resources. */
+ plane->layout = swapchain_plane->layout;
+ plane->mtl_handle = mtl_retain(swapchain_plane->mtl_handle);
+ plane->mtl_handle_array =
+ swapchain_plane->mtl_handle_array
+ ? mtl_retain(swapchain_plane->mtl_handle_array)
+ : NULL;
+ plane->addr = swapchain_plane->addr;
+
+ return VK_SUCCESS;
+ }
+#endif
+
+ uint64_t offset_B = info->memoryOffset;
+ if (image->disjoint) {
+ const VkBindImagePlaneMemoryInfo *plane_info =
+ vk_find_struct_const(info->pNext, BIND_IMAGE_PLANE_MEMORY_INFO);
+ const uint8_t plane =
+ kk_image_memory_aspects_to_plane(image, plane_info->planeAspect);
+ result =
+ kk_image_plane_bind(dev, image, &image->planes[plane], mem, &offset_B);
+ if (result != VK_SUCCESS)
+ return result;
+ } else {
+ for (unsigned plane = 0; plane < image->plane_count; plane++) {
+ result = kk_image_plane_bind(dev, image, &image->planes[plane], mem,
+ &offset_B);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+ }
+
+ if (image->stencil_copy_temp.layout.size_B > 0) {
+ result = kk_image_plane_bind(dev, image, &image->stencil_copy_temp, mem,
+ &offset_B);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_BindImageMemory2(VkDevice device, uint32_t bindInfoCount,
+ const VkBindImageMemoryInfo *pBindInfos)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VkResult first_error_or_success = VK_SUCCESS;
+
+ for (uint32_t i = 0; i < bindInfoCount; ++i) {
+ VkResult result = kk_bind_image_memory(dev, &pBindInfos[i]);
+
+ const VkBindMemoryStatusKHR *status =
+ vk_find_struct_const(pBindInfos[i].pNext, BIND_MEMORY_STATUS_KHR);
+ if (status != NULL && status->pResult != NULL)
+ *status->pResult = VK_SUCCESS;
+
+ if (first_error_or_success == VK_SUCCESS)
+ first_error_or_success = result;
+ }
+
+ return first_error_or_success;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_GetImageOpaqueCaptureDescriptorDataEXT(
+ VkDevice _device, const VkImageCaptureDescriptorDataInfoEXT *pInfo,
+ void *pData)
+{
+ return VK_SUCCESS;
+}
diff --git a/src/kosmickrisp/vulkan/kk_image.h b/src/kosmickrisp/vulkan/kk_image.h
new file mode 100644
index 00000000000..7ef11db3133
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_image.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_IMAGE_H
+#define KK_IMAGE_H 1
+
+#include "kk_private.h"
+
+#include "kk_device_memory.h"
+#include "kk_image_layout.h"
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "vk_image.h"
+
+/* Because small images can end up with an array_stride_B that is less than
+ * the sparse block size (in bytes), we have to set SINGLE_MIPTAIL_BIT when
+ * advertising sparse properties to the client. This means that we get one
+ * single memory range for the miptail of the image. For large images with
+ * mipTailStartLod > 0, we have to deal with the array stride ourselves.
+ *
+ * We do this by returning NVK_MIP_TAIL_START_OFFSET as the image's
+ * imageMipTailOffset. We can then detect anything with that address as
+ * being part of the miptail and re-map it accordingly. The Vulkan spec
+ * explicitly allows for this.
+ *
+ * From the Vulkan 1.3.279 spec:
+ *
+ * "When VK_SPARSE_MEMORY_BIND_METADATA_BIT is present, the resourceOffset
+ * must have been derived explicitly from the imageMipTailOffset in the
+ * sparse resource properties returned for the metadata aspect. By
+ * manipulating the value returned for imageMipTailOffset, the
+ * resourceOffset does not have to correlate directly to a device virtual
+ * address offset, and may instead be whatever value makes it easiest for
+ * the implementation to derive the correct device virtual address."
+ */
+#define NVK_MIP_TAIL_START_OFFSET 0x6d74000000000000UL
+
+struct kk_device_memory;
+struct kk_physical_device;
+struct kk_queue;
+
+VkFormatFeatureFlags2
+kk_get_image_format_features(struct kk_physical_device *pdevice,
+ VkFormat format, VkImageTiling tiling,
+ uint64_t drm_format_mod);
+
+uint32_t kk_image_max_dimension(VkImageType image_type);
+
+struct kk_image_plane {
+ struct kk_image_layout layout;
+ // TODO_KOSMICKRISP Only have one handle since we will only create 2D arrays
+ // anyway
+ /* Metal handle with original handle type */
+ mtl_texture *mtl_handle;
+ /* Metal handle with 2D array type for 3D images */
+ mtl_texture *mtl_handle_array;
+ uint64_t addr;
+};
+
+struct kk_image {
+ struct vk_image vk;
+
+ /** True if the planes are bound separately
+ * * This is set based on VK_IMAGE_CREATE_DISJOINT_BIT
+ */
+ bool disjoint;
+
+ uint8_t plane_count;
+ struct kk_image_plane planes[3];
+
+ /* In order to support D32_SFLOAT_S8_UINT, a temp area is
+ * needed. The stencil plane can't be a copied using the DMA
+ * engine in a single pass since it would need 8 components support.
+ * Instead we allocate a 16-bit temp, that gets copied into, then
+ * copied again down to the 8-bit result.
+ */
+ struct kk_image_plane stencil_copy_temp;
+};
+
+static inline mtl_resource *
+kk_image_to_mtl_resource(const struct kk_image *image, int plane)
+{
+ if (image != NULL) {
+ assert(plane < ARRAY_SIZE(image->planes));
+ return (mtl_resource *)image->planes[plane].mtl_handle;
+ }
+ return NULL;
+}
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
+
+static inline uint64_t
+kk_image_plane_base_address(const struct kk_image_plane *plane)
+{
+ return plane->addr;
+}
+
+static inline uint64_t
+kk_image_base_address(const struct kk_image *image, uint8_t plane)
+{
+ return kk_image_plane_base_address(&image->planes[plane]);
+}
+
+static inline uint8_t
+kk_image_aspects_to_plane(ASSERTED const struct kk_image *image,
+ VkImageAspectFlags aspectMask)
+{
+ /* Memory planes are only allowed for memory operations */
+ assert(!(aspectMask & (VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT |
+ VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT |
+ VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT |
+ VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT)));
+
+ /* Verify that the aspects are actually in the image */
+ assert(!(aspectMask & ~image->vk.aspects));
+
+ /* Must only be one aspect unless it's depth/stencil */
+ assert(aspectMask ==
+ (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) ||
+ util_bitcount(aspectMask) == 1);
+
+ switch (aspectMask) {
+ case VK_IMAGE_ASPECT_PLANE_1_BIT:
+ return 1;
+ case VK_IMAGE_ASPECT_PLANE_2_BIT:
+ return 2;
+ default:
+ return 0;
+ }
+}
+
+static inline uint8_t
+kk_image_memory_aspects_to_plane(ASSERTED const struct kk_image *image,
+ VkImageAspectFlags aspectMask)
+{
+ if (aspectMask & (VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT |
+ VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT |
+ VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT |
+ VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT)) {
+ /* We don't support DRM format modifiers on anything but single-plane
+ * color at the moment.
+ */
+ assert(aspectMask == VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT);
+ return 0;
+ } else {
+ return kk_image_aspects_to_plane(image, aspectMask);
+ }
+}
+
+#endif
diff --git a/src/kosmickrisp/vulkan/kk_image_layout.c b/src/kosmickrisp/vulkan/kk_image_layout.c
new file mode 100644
index 00000000000..06d4ed8754a
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_image_layout.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_image_layout.h"
+
+#include "kk_device.h"
+#include "kk_format.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+#include "kosmickrisp/bridge/mtl_format.h"
+
+#include "util/format/u_format.h"
+
+static enum mtl_texture_type
+vk_image_create_info_to_mtl_texture_type(
+ const struct VkImageCreateInfo *create_info)
+{
+ uint32_t array_layers = create_info->arrayLayers;
+ uint32_t samples = create_info->samples;
+ switch (create_info->imageType) {
+ case VK_IMAGE_TYPE_1D:
+ case VK_IMAGE_TYPE_2D:
+ /* We require input attachments to be arrays */
+ if (array_layers > 1 ||
+ (create_info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
+ return samples > 1u ? MTL_TEXTURE_TYPE_2D_ARRAY_MULTISAMPLE
+ : MTL_TEXTURE_TYPE_2D_ARRAY;
+ return samples > 1u ? MTL_TEXTURE_TYPE_2D_MULTISAMPLE
+ : MTL_TEXTURE_TYPE_2D;
+ case VK_IMAGE_TYPE_3D:
+ return MTL_TEXTURE_TYPE_3D;
+ default:
+ UNREACHABLE("Invalid image type");
+ return MTL_TEXTURE_TYPE_1D; /* Just return a type we don't actually use */
+ }
+}
+
+static enum mtl_texture_usage
+vk_image_usage_flags_to_mtl_texture_usage(VkImageUsageFlags usage_flags,
+ VkImageCreateFlags create_flags,
+ bool supports_atomics)
+{
+ enum mtl_texture_usage usage = 0u;
+
+ const VkImageUsageFlags shader_write =
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
+ if (usage_flags & shader_write)
+ usage |= MTL_TEXTURE_USAGE_SHADER_WRITE;
+
+ const VkImageUsageFlags shader_read = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
+ if (usage_flags & shader_read)
+ usage |= MTL_TEXTURE_USAGE_SHADER_READ;
+
+ const VkImageUsageFlags render_attachment =
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+
+ if (usage_flags & render_attachment)
+ usage |= MTL_TEXTURE_USAGE_RENDER_TARGET;
+
+ if (create_flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT)
+ usage |= MTL_TEXTURE_USAGE_PIXEL_FORMAT_VIEW;
+
+ if (supports_atomics) {
+ usage |= MTL_TEXTURE_USAGE_SHADER_READ;
+ usage |= MTL_TEXTURE_USAGE_SHADER_WRITE;
+ usage |= MTL_TEXTURE_USAGE_SHADER_ATOMIC;
+ }
+
+ return usage;
+}
+
+void
+kk_image_layout_init(const struct kk_device *dev,
+ const struct VkImageCreateInfo *create_info,
+ enum pipe_format format, const uint8_t width_scale,
+ const uint8_t height_scale, struct kk_image_layout *layout)
+{
+ const struct kk_va_format *supported_format = kk_get_va_format(format);
+ layout->type = vk_image_create_info_to_mtl_texture_type(create_info);
+ layout->width_px = create_info->extent.width / width_scale;
+ layout->height_px = create_info->extent.height / height_scale;
+ layout->depth_px = create_info->extent.depth;
+ layout->layers = create_info->arrayLayers;
+ layout->levels = create_info->mipLevels;
+ layout->optimized_layout = create_info->tiling == VK_IMAGE_TILING_OPTIMAL;
+ layout->usage = vk_image_usage_flags_to_mtl_texture_usage(
+ create_info->usage, create_info->flags, supported_format->atomic);
+ layout->format.pipe = format;
+ layout->format.mtl = supported_format->mtl_pixel_format;
+ layout->swizzle.red = supported_format->swizzle.red;
+ layout->swizzle.green = supported_format->swizzle.green;
+ layout->swizzle.blue = supported_format->swizzle.blue;
+ layout->swizzle.alpha = supported_format->swizzle.alpha;
+ layout->sample_count_sa = create_info->samples;
+ mtl_heap_texture_size_and_align_with_descriptor(dev->mtl_handle, layout);
+
+ /*
+ * Metal requires adding MTL_TEXTURE_USAGE_PIXEL_FORMAT_VIEW if we are going
+ * to reinterpret the format with a different format. This seems to be the
+ * only format with this issue.
+ */
+ if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+ layout->usage |= MTL_TEXTURE_USAGE_PIXEL_FORMAT_VIEW;
+ }
+
+ // TODO_KOSMICKRISP Fill remaining offsets and strides whenever possible
+ if (create_info->tiling == VK_IMAGE_TILING_LINEAR) {
+ const struct util_format_description *format_desc =
+ util_format_description(layout->format.pipe);
+ size_t bytes_per_texel = format_desc->block.bits / 8;
+ layout->linear_stride_B =
+ align(bytes_per_texel * layout->width_px, layout->align_B);
+ layout->layer_stride_B = layout->linear_stride_B * layout->height_px;
+ /* Metal only allows for 2D texture with no mipmapping. */
+ layout->size_B = layout->layer_stride_B;
+ }
+}
diff --git a/src/kosmickrisp/vulkan/kk_image_layout.h b/src/kosmickrisp/vulkan/kk_image_layout.h
new file mode 100644
index 00000000000..70b1c2bfe68
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_image_layout.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_IMAGE_LAYOUT_H
+#define KK_IMAGE_LAYOUT_H 1
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "util/format/u_formats.h"
+
+#include "vulkan/vulkan.h"
+
+#define KK_MAX_MIP_LEVELS 16
+
+struct kk_device;
+struct VkImageCreateInfo;
+enum pipe_swizzle;
+
+struct kk_image_layout {
+ /** Width, height, and depth in pixels at level 0 */
+ uint32_t width_px, height_px, depth_px, layers;
+
+ enum mtl_texture_type type;
+
+ /** Number of samples per pixel. 1 if multisampling is disabled. */
+ uint8_t sample_count_sa;
+
+ /** Number of miplevels. 1 if no mipmapping is used. */
+ uint8_t levels;
+
+ uint8_t optimized_layout;
+
+ enum mtl_texture_usage usage;
+
+ /** Texture format */
+ struct {
+ enum pipe_format pipe;
+ uint32_t mtl;
+ } format;
+
+ /* Required to correctly set image swizzle for non-native formats */
+ /* Would love to use enum pipe_swizzle, but it's bigger than the required
+ * type for util_format_compose_swizzles... */
+ struct {
+ uint8_t red;
+ uint8_t green;
+ uint8_t blue;
+ uint8_t alpha;
+ } swizzle;
+
+ /**
+ * If tiling is LINEAR, the number of bytes between adjacent rows of
+ * elements. Otherwise, this field is zero.
+ */
+ uint32_t linear_stride_B;
+
+ /**
+ * Stride between layers of an array texture, including a cube map. Layer i
+ * begins at offset (i * layer_stride_B) from the beginning of the texture.
+ *
+ * If depth_px = 1, the value of this field is UNDEFINED.
+ */
+ uint64_t layer_stride_B;
+
+ /**
+ * Offsets of mip levels within a layer.
+ */
+ uint64_t level_offsets_B[KK_MAX_MIP_LEVELS];
+
+ /**
+ * If tiling is TWIDDLED, the stride in elements used for each mip level
+ * within a layer. Calculating level strides is the sole responsibility of
+ * ail_initialized_twiddled. This is necessary because compressed pixel
+ * formats may add extra stride padding.
+ */
+ uint32_t stride_el[KK_MAX_MIP_LEVELS];
+
+ /* Size of entire texture */
+ uint64_t size_B;
+
+ /* Alignment required */
+ uint64_t align_B;
+};
+
+struct kk_view_layout {
+ /** Type */
+ VkImageViewType view_type;
+
+ /** Number of samples per pixel. 1 if multisampling is disabled.
+ * Required to be able to correctly set the MTLTextureType.
+ */
+ uint8_t sample_count_sa;
+
+ /** Texture format */
+ struct {
+ enum pipe_format pipe;
+ uint32_t mtl;
+ } format;
+
+ /** Array base level. 0 if no array is used. */
+ uint16_t base_array_layer;
+
+ /** Array length. 1 if no array is used. */
+ uint16_t array_len;
+
+ /** Swizzle */
+ /* Would love to use enum pipe_swizzle, but it's bigger than the required
+ * type for util_format_compose_swizzles... */
+ struct {
+ union {
+ struct {
+ uint8_t red;
+ uint8_t green;
+ uint8_t blue;
+ uint8_t alpha;
+ };
+ uint8_t channels[4];
+ };
+ } swizzle;
+
+ /** Mipmap base level. 0 if no mipmapping is used. */
+ uint8_t base_level;
+
+ /** Number of miplevels. 1 if no mipmapping is used. */
+ uint8_t num_levels;
+
+ uint16_t min_lod_clamp;
+};
+
+void kk_image_layout_init(const struct kk_device *dev,
+ const struct VkImageCreateInfo *create_info,
+ enum pipe_format format, const uint8_t width_scale,
+ const uint8_t height_scale,
+ struct kk_image_layout *layout);
+
+#endif /* KK_IMAGE_LAYOUT_H */
diff --git a/src/kosmickrisp/vulkan/kk_image_view.c b/src/kosmickrisp/vulkan/kk_image_view.c
new file mode 100644
index 00000000000..ed8bd485972
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_image_view.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_image_view.h"
+
+#include "kk_device.h"
+#include "kk_entrypoints.h"
+#include "kk_format.h"
+#include "kk_image.h"
+#include "kk_physical_device.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+#include "kosmickrisp/bridge/mtl_format.h"
+
+#include "vk_format.h"
+
+static enum pipe_swizzle
+vk_swizzle_to_pipe(VkComponentSwizzle swizzle)
+{
+ switch (swizzle) {
+ case VK_COMPONENT_SWIZZLE_R:
+ return PIPE_SWIZZLE_X;
+ case VK_COMPONENT_SWIZZLE_G:
+ return PIPE_SWIZZLE_Y;
+ case VK_COMPONENT_SWIZZLE_B:
+ return PIPE_SWIZZLE_Z;
+ case VK_COMPONENT_SWIZZLE_A:
+ return PIPE_SWIZZLE_W;
+ case VK_COMPONENT_SWIZZLE_ONE:
+ return PIPE_SWIZZLE_1;
+ case VK_COMPONENT_SWIZZLE_ZERO:
+ return PIPE_SWIZZLE_0;
+ default:
+ UNREACHABLE("Invalid component swizzle");
+ }
+}
+
+static enum VkImageViewType
+remove_1d_view_types(enum VkImageViewType type)
+{
+ if (type == VK_IMAGE_VIEW_TYPE_1D)
+ return VK_IMAGE_VIEW_TYPE_2D;
+ if (type == VK_IMAGE_VIEW_TYPE_1D_ARRAY)
+ return VK_IMAGE_VIEW_TYPE_2D_ARRAY;
+ return type;
+}
+
+VkResult
+kk_image_view_init(struct kk_device *dev, struct kk_image_view *view,
+ const VkImageViewCreateInfo *pCreateInfo)
+{
+ VK_FROM_HANDLE(kk_image, image, pCreateInfo->image);
+
+ memset(view, 0, sizeof(*view));
+
+ vk_image_view_init(&dev->vk, &view->vk, pCreateInfo);
+
+ /* First, figure out which image planes we need.
+ * For depth/stencil, we only have plane so simply assert
+ * and then map directly betweeen the image and view plane
+ */
+ if (image->vk.aspects &
+ (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ assert(image->plane_count == 1);
+ assert(kk_image_aspects_to_plane(image, view->vk.aspects) == 0);
+ view->plane_count = 1;
+ view->planes[0].image_plane = 0;
+ } else {
+ /* For other formats, retrieve the plane count from the aspect mask
+ * and then walk through the aspect mask to map each image plane
+ * to its corresponding view plane
+ */
+ assert(util_bitcount(view->vk.aspects) ==
+ vk_format_get_plane_count(view->vk.format));
+ view->plane_count = 0;
+ u_foreach_bit(aspect_bit, view->vk.aspects) {
+ uint8_t image_plane =
+ kk_image_aspects_to_plane(image, 1u << aspect_bit);
+ view->planes[view->plane_count++].image_plane = image_plane;
+ }
+ }
+ /* Finally, fill in each view plane separately */
+ for (unsigned view_plane = 0; view_plane < view->plane_count; view_plane++) {
+ const uint8_t image_plane = view->planes[view_plane].image_plane;
+ struct kk_image_plane *plane = &image->planes[image_plane];
+
+ const struct vk_format_ycbcr_info *ycbcr_info =
+ vk_format_get_ycbcr_info(view->vk.format);
+ assert(ycbcr_info || view_plane == 0);
+ VkFormat plane_format =
+ ycbcr_info ? ycbcr_info->planes[view_plane].format : view->vk.format;
+ enum pipe_format p_format = vk_format_to_pipe_format(plane_format);
+ if (view->vk.aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+ p_format = vk_format_to_pipe_format(image->vk.format);
+ else if (view->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT)
+ p_format = util_format_stencil_only(
+ vk_format_to_pipe_format(image->vk.format));
+
+ view->planes[view_plane].format = p_format;
+ const struct kk_va_format *supported_format = kk_get_va_format(p_format);
+ assert(supported_format);
+
+ struct kk_view_layout view_layout = {
+ .view_type = remove_1d_view_types(view->vk.view_type),
+ .sample_count_sa = plane->layout.sample_count_sa,
+ .format = {.pipe = p_format,
+ .mtl = supported_format->mtl_pixel_format},
+ .base_level = view->vk.base_mip_level,
+ .num_levels = view->vk.level_count,
+ .base_array_layer = view->vk.base_array_layer,
+ .array_len = view->vk.layer_count,
+ .min_lod_clamp = view->vk.min_lod,
+ };
+ uint8_t view_swizzle[4] = {vk_swizzle_to_pipe(view->vk.swizzle.r),
+ vk_swizzle_to_pipe(view->vk.swizzle.g),
+ vk_swizzle_to_pipe(view->vk.swizzle.b),
+ vk_swizzle_to_pipe(view->vk.swizzle.a)};
+ util_format_compose_swizzles(supported_format->swizzle.channels,
+ view_swizzle, view_layout.swizzle.channels);
+
+ /* When sampling a depth/stencil texture Metal returns (d, d, d, 1), but
+ * Vulkan requires (d, 0, 0, 1). This means, we need to convert G and B to
+ * 0 */
+ if (util_format_is_depth_or_stencil(p_format)) {
+ if (view_layout.swizzle.red == PIPE_SWIZZLE_Y ||
+ view_layout.swizzle.red == PIPE_SWIZZLE_Z)
+ view_layout.swizzle.red = PIPE_SWIZZLE_0;
+ if (view_layout.swizzle.green == PIPE_SWIZZLE_Y ||
+ view_layout.swizzle.green == PIPE_SWIZZLE_Z)
+ view_layout.swizzle.green = PIPE_SWIZZLE_0;
+ if (view_layout.swizzle.blue == PIPE_SWIZZLE_Y ||
+ view_layout.swizzle.blue == PIPE_SWIZZLE_Z)
+ view_layout.swizzle.blue = PIPE_SWIZZLE_0;
+ if (view_layout.swizzle.alpha == PIPE_SWIZZLE_Y ||
+ view_layout.swizzle.alpha == PIPE_SWIZZLE_Z)
+ view_layout.swizzle.alpha = PIPE_SWIZZLE_0;
+ }
+
+ mtl_texture *mtl_handle = image->planes[image_plane].mtl_handle;
+ if (image->vk.image_type == VK_IMAGE_TYPE_3D &&
+ view->vk.view_type != VK_IMAGE_VIEW_TYPE_3D)
+ mtl_handle = image->planes[image_plane].mtl_handle_array;
+
+ if (view->vk.usage &
+ (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
+ view->planes[view_plane].mtl_handle_sampled =
+ mtl_new_texture_view_with(mtl_handle, &view_layout);
+ view->planes[view_plane].sampled_gpu_resource_id =
+ mtl_texture_get_gpu_resource_id(
+ view->planes[view_plane].mtl_handle_sampled);
+ }
+
+ if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) {
+ /* For storage images, we can't have any cubes */
+ if (view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE ||
+ view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
+ view_layout.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
+
+ view->planes[view_plane].mtl_handle_storage =
+ mtl_new_texture_view_with(mtl_handle, &view_layout);
+ view->planes[view_plane].storage_gpu_resource_id =
+ mtl_texture_get_gpu_resource_id(
+ view->planes[view_plane].mtl_handle_storage);
+ }
+
+ if (view->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
+ bool requires_type_change =
+ view_layout.view_type != VK_IMAGE_VIEW_TYPE_3D &&
+ view_layout.view_type != VK_IMAGE_VIEW_TYPE_2D_ARRAY;
+ bool requires_format_change = view->vk.format != image->vk.format;
+ VkImageViewType original_type = view_layout.view_type;
+
+ /* Required so sampling from input attachments actually return (d, 0,
+ * 0, 1) for d/s attachments and render targets cannot have swizzle
+ * according to Metal...
+ */
+ if (requires_type_change || requires_format_change) {
+ view_layout.view_type = requires_type_change
+ ? VK_IMAGE_VIEW_TYPE_2D_ARRAY
+ : original_type;
+ view->planes[view_plane].mtl_handle_input =
+ mtl_new_texture_view_with(mtl_handle, &view_layout);
+ } else
+ view->planes[view_plane].mtl_handle_input = mtl_retain(mtl_handle);
+ view->planes[view_plane].input_gpu_resource_id =
+ mtl_texture_get_gpu_resource_id(
+ view->planes[view_plane].mtl_handle_input);
+
+ /* Handle mutable formats */
+ if (requires_format_change) {
+ view_layout.view_type = original_type;
+ view_layout.base_array_layer = 0u;
+ view_layout.base_level = 0u;
+ view_layout.array_len = image->vk.array_layers;
+ view_layout.num_levels = image->vk.mip_levels;
+ view->planes[view_plane].mtl_handle_render =
+ mtl_new_texture_view_with_no_swizzle(mtl_handle, &view_layout);
+ } else
+ view->planes[view_plane].mtl_handle_render = mtl_retain(mtl_handle);
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+void
+kk_image_view_finish(struct kk_device *dev, struct kk_image_view *view)
+{
+ for (uint8_t plane = 0; plane < view->plane_count; plane++) {
+ if (view->planes[plane].mtl_handle_sampled)
+ mtl_release(view->planes[plane].mtl_handle_sampled);
+
+ if (view->planes[plane].mtl_handle_storage)
+ mtl_release(view->planes[plane].mtl_handle_storage);
+
+ if (view->planes[plane].mtl_handle_input)
+ mtl_release(view->planes[plane].mtl_handle_input);
+
+ if (view->planes[plane].mtl_handle_render)
+ mtl_release(view->planes[plane].mtl_handle_render);
+ }
+
+ vk_image_view_finish(&view->vk);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkImageView *pView)
+{
+ VK_FROM_HANDLE(kk_device, dev, _device);
+ struct kk_image_view *view;
+ VkResult result;
+
+ view = vk_alloc2(&dev->vk.alloc, pAllocator, sizeof(*view), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!view)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ result = kk_image_view_init(dev, view, pCreateInfo);
+ if (result != VK_SUCCESS) {
+ vk_free2(&dev->vk.alloc, pAllocator, view);
+ return result;
+ }
+
+ *pView = kk_image_view_to_handle(view);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_DestroyImageView(VkDevice _device, VkImageView imageView,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_device, dev, _device);
+ VK_FROM_HANDLE(kk_image_view, view, imageView);
+
+ if (!view)
+ return;
+
+ kk_image_view_finish(dev, view);
+ vk_free2(&dev->vk.alloc, pAllocator, view);
+}
diff --git a/src/kosmickrisp/vulkan/kk_image_view.h b/src/kosmickrisp/vulkan/kk_image_view.h
new file mode 100644
index 00000000000..1a18c5d6f66
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_image_view.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_IMAGE_VIEW_H
+#define KK_IMAGE_VIEW_H 1
+
+#include "kk_private.h"
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "util/format/u_formats.h"
+
+#include "vk_image.h"
+
+struct kk_device;
+
+struct kk_image_view {
+ struct vk_image_view vk;
+
+ uint8_t plane_count;
+ struct {
+ uint8_t image_plane;
+
+ enum pipe_format format;
+
+ mtl_texture *mtl_handle_sampled;
+ mtl_texture
+ *mtl_handle_storage; // TODO_KOSMICKRISP We can probably get rid of
+ // this once we lower 2D cubes and 3D to 2D array?
+
+ /* Cached handle so we don't have to retrieve it from the image when we
+ * render */
+ mtl_texture *mtl_handle_render;
+
+ /* Input attachment handle. Required since input attachments needs to be
+ * arrays, and sampled may not be */
+ mtl_texture *mtl_handle_input;
+
+ uint64_t sampled_gpu_resource_id;
+ uint64_t storage_gpu_resource_id;
+ uint64_t input_gpu_resource_id;
+ } planes[3];
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_image_view, vk.base, VkImageView,
+ VK_OBJECT_TYPE_IMAGE_VIEW)
+
+VkResult kk_image_view_init(struct kk_device *dev, struct kk_image_view *view,
+ const VkImageViewCreateInfo *pCreateInfo);
+
+void kk_image_view_finish(struct kk_device *dev, struct kk_image_view *view);
+
+#endif /* KK_IMAGE_VIEW_H */
diff --git a/src/kosmickrisp/vulkan/kk_instance.c b/src/kosmickrisp/vulkan/kk_instance.c
new file mode 100644
index 00000000000..a51f2fd958b
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_instance.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_instance.h"
+
+#include "kk_debug.h"
+#include "kk_entrypoints.h"
+#include "kk_physical_device.h"
+
+#include "kk_dispatch_trampolines.h"
+
+#include "vulkan/wsi/wsi_common.h"
+
+#include "util/build_id.h"
+#include "util/driconf.h"
+#include "util/mesa-sha1.h"
+#include "util/u_debug.h"
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_EnumerateInstanceVersion(uint32_t *pApiVersion)
+{
+ uint32_t version_override = vk_get_version_override();
+ *pApiVersion = version_override ? version_override
+ : VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
+
+ return VK_SUCCESS;
+}
+
+static const struct vk_instance_extension_table instance_extensions = {
+#ifdef KK_USE_WSI_PLATFORM
+ .KHR_get_surface_capabilities2 = true,
+ .KHR_surface = true,
+ .KHR_surface_protected_capabilities = true,
+ .EXT_surface_maintenance1 = true,
+ .EXT_swapchain_colorspace = true,
+#endif
+#ifdef VK_USE_PLATFORM_WAYLAND_KHR
+ .KHR_wayland_surface = true,
+#endif
+#ifdef VK_USE_PLATFORM_XCB_KHR
+ .KHR_xcb_surface = true,
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_KHR
+ .KHR_xlib_surface = true,
+#endif
+#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
+ .EXT_acquire_xlib_display = true,
+#endif
+#ifdef VK_USE_PLATFORM_DISPLAY_KHR
+ .KHR_display = true,
+ .KHR_get_display_properties2 = true,
+ .EXT_direct_mode_display = true,
+ .EXT_display_surface_counter = true,
+ .EXT_acquire_drm_display = true,
+#endif
+#ifdef VK_USE_PLATFORM_METAL_EXT
+ .EXT_metal_surface = true,
+#endif
+#ifndef VK_USE_PLATFORM_METAL_EXT
+ .EXT_headless_surface = true,
+#endif
+ .KHR_device_group_creation = true,
+ .KHR_external_fence_capabilities = true,
+ .KHR_external_memory_capabilities = true,
+ .KHR_external_semaphore_capabilities = true,
+ .KHR_get_physical_device_properties2 = true,
+ .EXT_debug_report = true,
+ .EXT_debug_utils = true,
+};
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_EnumerateInstanceExtensionProperties(const char *pLayerName,
+ uint32_t *pPropertyCount,
+ VkExtensionProperties *pProperties)
+{
+ if (pLayerName)
+ return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+
+ return vk_enumerate_instance_extension_properties(
+ &instance_extensions, pPropertyCount, pProperties);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkInstance *pInstance)
+{
+ struct kk_instance *instance;
+ VkResult result;
+
+ if (pAllocator == NULL)
+ pAllocator = vk_default_allocator();
+
+ instance = vk_alloc(pAllocator, sizeof(*instance), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+ if (!instance)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ struct vk_instance_dispatch_table dispatch_table;
+ vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
+ &kk_instance_entrypoints, true);
+ vk_instance_dispatch_table_from_entrypoints(
+ &dispatch_table, &wsi_instance_entrypoints, false);
+
+ result = vk_instance_init(&instance->vk, &instance_extensions,
+ &dispatch_table, pCreateInfo, pAllocator);
+ if (result != VK_SUCCESS)
+ goto fail_alloc;
+
+ instance->vk.physical_devices.enumerate = kk_enumerate_physical_devices;
+ instance->vk.physical_devices.destroy = kk_physical_device_destroy;
+
+ /* TODO_KOSMICKRISP We need to fill instance->driver_build_sha */
+
+ kk_process_debug_variable();
+
+ *pInstance = kk_instance_to_handle(instance);
+ return VK_SUCCESS;
+
+// fail_init:
+// vk_instance_finish(&instance->vk);
+fail_alloc:
+ vk_free(pAllocator, instance);
+
+ return result;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_DestroyInstance(VkInstance _instance,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_instance, instance, _instance);
+
+ if (!instance)
+ return;
+
+ vk_instance_finish(&instance->vk);
+ vk_free(&instance->vk.alloc, instance);
+}
+
+/* We need this to return our own trampoline functions */
+static PFN_vkVoidFunction
+kk_instance_get_proc_addr(const struct vk_instance *instance,
+ const struct vk_instance_entrypoint_table *entrypoints,
+ const char *name)
+{
+ PFN_vkVoidFunction func;
+
+ /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
+ * when we have to return valid function pointers, NULL, or it's left
+ * undefined. See the table for exact details.
+ */
+ if (name == NULL)
+ return NULL;
+
+#define LOOKUP_VK_ENTRYPOINT(entrypoint) \
+ if (strcmp(name, "vk" #entrypoint) == 0) \
+ return (PFN_vkVoidFunction)entrypoints->entrypoint
+
+ LOOKUP_VK_ENTRYPOINT(EnumerateInstanceExtensionProperties);
+ LOOKUP_VK_ENTRYPOINT(EnumerateInstanceLayerProperties);
+ LOOKUP_VK_ENTRYPOINT(EnumerateInstanceVersion);
+ LOOKUP_VK_ENTRYPOINT(CreateInstance);
+
+ /* GetInstanceProcAddr() can also be called with a NULL instance.
+ * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
+ */
+ LOOKUP_VK_ENTRYPOINT(GetInstanceProcAddr);
+
+#undef LOOKUP_VK_ENTRYPOINT
+
+ /* Beginning with ICD interface v7, the following functions can also be
+ * retrieved via vk_icdGetInstanceProcAddr.
+ */
+
+ if (strcmp(name, "vk_icdNegotiateLoaderICDInterfaceVersion") == 0)
+ return (PFN_vkVoidFunction)vk_icdNegotiateLoaderICDInterfaceVersion;
+ if (strcmp(name, "vk_icdGetPhysicalDeviceProcAddr") == 0)
+ return (PFN_vkVoidFunction)vk_icdGetPhysicalDeviceProcAddr;
+#ifdef _WIN32
+ if (strcmp(name, "vk_icdEnumerateAdapterPhysicalDevices") == 0)
+ return (PFN_vkVoidFunction)vk_icdEnumerateAdapterPhysicalDevices;
+#endif
+
+ if (instance == NULL)
+ return NULL;
+
+ func = vk_instance_dispatch_table_get_if_supported(
+ &instance->dispatch_table, name, instance->app_info.api_version,
+ &instance->enabled_extensions);
+ if (func != NULL)
+ return func;
+
+ func = vk_physical_device_dispatch_table_get_if_supported(
+ &kk_physical_device_trampolines, name, instance->app_info.api_version,
+ &instance->enabled_extensions);
+ if (func != NULL)
+ return func;
+
+ func = vk_device_dispatch_table_get_if_supported(
+ &kk_device_trampolines, name, instance->app_info.api_version,
+ &instance->enabled_extensions, NULL);
+ if (func != NULL)
+ return func;
+
+ return NULL;
+}
+
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+kk_GetInstanceProcAddr(VkInstance _instance, const char *pName)
+{
+ VK_FROM_HANDLE(kk_instance, instance, _instance);
+ return kk_instance_get_proc_addr(&instance->vk, &kk_instance_entrypoints,
+ pName);
+}
+
+PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
+{
+ return kk_GetInstanceProcAddr(instance, pName);
+}
diff --git a/src/kosmickrisp/vulkan/kk_instance.h b/src/kosmickrisp/vulkan/kk_instance.h
new file mode 100644
index 00000000000..0afbb29a55e
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_instance.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_INSTANCE_H
+#define KK_INSTANCE_H 1
+
+#include "kk_private.h"
+
+#include "util/xmlconfig.h"
+#include "vk_instance.h"
+
+struct kk_instance {
+ struct vk_instance vk;
+
+ uint8_t driver_build_sha[20];
+ uint32_t force_vk_vendor;
+};
+
+VK_DEFINE_HANDLE_CASTS(kk_instance, vk.base, VkInstance,
+ VK_OBJECT_TYPE_INSTANCE)
+
+#endif // KK_INSTANCE_H
diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_descriptors.c b/src/kosmickrisp/vulkan/kk_nir_lower_descriptors.c
new file mode 100644
index 00000000000..da076d41815
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_nir_lower_descriptors.c
@@ -0,0 +1,765 @@
+/*
+ * Copyright 2024 Valve Corporation
+ * Copyright 2024 Alyssa Rosenzweig
+ * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+#include "kk_cmd_buffer.h"
+#include "kk_descriptor_set.h"
+#include "kk_descriptor_set_layout.h"
+#include "kk_shader.h"
+
+#include "kosmickrisp/compiler/nir_to_msl.h"
+
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_builder_opcodes.h"
+#include "nir_intrinsics.h"
+#include "nir_intrinsics_indices.h"
+#include "shader_enums.h"
+#include "vk_pipeline.h"
+
+#include "vulkan/vulkan_core.h"
+
+struct lower_descriptors_ctx {
+ const struct kk_descriptor_set_layout *set_layouts[KK_MAX_SETS];
+
+ bool clamp_desc_array_bounds;
+ nir_address_format ubo_addr_format;
+ nir_address_format ssbo_addr_format;
+};
+
+static const struct kk_descriptor_set_binding_layout *
+get_binding_layout(uint32_t set, uint32_t binding,
+ const struct lower_descriptors_ctx *ctx)
+{
+ assert(set < KK_MAX_SETS);
+ assert(ctx->set_layouts[set] != NULL);
+
+ const struct kk_descriptor_set_layout *set_layout = ctx->set_layouts[set];
+
+ assert(binding < set_layout->binding_count);
+ return &set_layout->binding[binding];
+}
+
+static nir_def *
+load_speculatable(nir_builder *b, unsigned num_components, unsigned bit_size,
+ nir_def *addr, unsigned align)
+{
+ return nir_build_load_global_constant(b, num_components, bit_size, addr,
+ .align_mul = align,
+ .access = ACCESS_CAN_SPECULATE);
+}
+
+static nir_def *
+load_root(nir_builder *b, unsigned num_components, unsigned bit_size,
+ nir_def *offset, unsigned align)
+{
+ nir_def *root = nir_load_buffer_ptr_kk(b, 1, 64, .binding = 0);
+
+ /* We've bound the address of the root descriptor, index in. */
+ nir_def *addr = nir_iadd(b, root, nir_u2u64(b, offset));
+
+ return load_speculatable(b, num_components, bit_size, addr, align);
+}
+
+static bool
+lower_load_constant(nir_builder *b, nir_intrinsic_instr *load,
+ const struct lower_descriptors_ctx *ctx)
+{
+ assert(load->intrinsic == nir_intrinsic_load_constant);
+ UNREACHABLE("todo: stick an address in the root descriptor or something");
+
+ uint32_t base = nir_intrinsic_base(load);
+ uint32_t range = nir_intrinsic_range(load);
+
+ b->cursor = nir_before_instr(&load->instr);
+
+ nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base);
+ nir_def *data = nir_load_ubo(
+ b, load->def.num_components, load->def.bit_size, nir_imm_int(b, 0),
+ offset, .align_mul = nir_intrinsic_align_mul(load),
+ .align_offset = nir_intrinsic_align_offset(load), .range_base = base,
+ .range = range);
+
+ nir_def_rewrite_uses(&load->def, data);
+
+ return true;
+}
+
+/* helper macro for computing root descriptor byte offsets */
+#define kk_root_descriptor_offset(member) \
+ offsetof(struct kk_root_descriptor_table, member)
+
+static nir_def *
+load_descriptor_set_addr(nir_builder *b, uint32_t set,
+ UNUSED const struct lower_descriptors_ctx *ctx)
+{
+ uint32_t set_addr_offset =
+ kk_root_descriptor_offset(sets) + set * sizeof(uint64_t);
+
+ return load_root(b, 1, 64, nir_imm_int(b, set_addr_offset), 8);
+}
+
+static nir_def *
+load_dynamic_buffer_start(nir_builder *b, uint32_t set,
+ const struct lower_descriptors_ctx *ctx)
+{
+ int dynamic_buffer_start_imm = 0;
+ for (uint32_t s = 0; s < set; s++) {
+ if (ctx->set_layouts[s] == NULL) {
+ dynamic_buffer_start_imm = -1;
+ break;
+ }
+
+ dynamic_buffer_start_imm += ctx->set_layouts[s]->dynamic_buffer_count;
+ }
+
+ if (dynamic_buffer_start_imm >= 0) {
+ return nir_imm_int(b, dynamic_buffer_start_imm);
+ } else {
+ uint32_t root_offset =
+ kk_root_descriptor_offset(set_dynamic_buffer_start) + set;
+
+ return nir_u2u32(b, load_root(b, 1, 8, nir_imm_int(b, root_offset), 1));
+ }
+}
+
+static nir_def *
+load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size,
+ uint32_t set, uint32_t binding, nir_def *index,
+ unsigned offset_B, const struct lower_descriptors_ctx *ctx)
+{
+ const struct kk_descriptor_set_binding_layout *binding_layout =
+ get_binding_layout(set, binding, ctx);
+
+ if (ctx->clamp_desc_array_bounds)
+ index =
+ nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1));
+
+ switch (binding_layout->type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+ /* Get the index in the root descriptor table dynamic_buffers array. */
+ nir_def *dynamic_buffer_start = load_dynamic_buffer_start(b, set, ctx);
+
+ index = nir_iadd(b, index,
+ nir_iadd_imm(b, dynamic_buffer_start,
+ binding_layout->dynamic_buffer_index));
+
+ nir_def *root_desc_offset = nir_iadd_imm(
+ b, nir_imul_imm(b, index, sizeof(struct kk_buffer_address)),
+ kk_root_descriptor_offset(dynamic_buffers));
+
+ assert(num_components == 4 && bit_size == 32);
+ nir_def *desc = load_root(b, 4, 32, root_desc_offset, 16);
+
+ /* We know a priori that the the .w compnent (offset) is zero */
+ return nir_vector_insert_imm(b, desc, nir_imm_int(b, 0), 3);
+ }
+
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: {
+ nir_def *base_addr = nir_iadd_imm(
+ b, load_descriptor_set_addr(b, set, ctx), binding_layout->offset);
+
+ assert(binding_layout->stride == 1);
+ const uint32_t binding_size = binding_layout->array_size;
+
+ /* Convert it to nir_address_format_64bit_bounded_global */
+ assert(num_components == 4 && bit_size == 32);
+ return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
+ nir_unpack_64_2x32_split_y(b, base_addr),
+ nir_imm_int(b, binding_size), nir_imm_int(b, 0));
+ }
+
+ default: {
+ assert(binding_layout->stride > 0);
+ nir_def *desc_ubo_offset =
+ nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride),
+ binding_layout->offset + offset_B);
+
+ unsigned desc_align_mul = (1 << (ffs(binding_layout->stride) - 1));
+ desc_align_mul = MIN2(desc_align_mul, 16);
+ unsigned desc_align_offset = binding_layout->offset + offset_B;
+ desc_align_offset %= desc_align_mul;
+
+ nir_def *desc;
+ nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
+ desc = nir_load_global_constant_offset(
+ b, num_components, bit_size, set_addr, desc_ubo_offset,
+ .align_mul = desc_align_mul, .align_offset = desc_align_offset,
+ .access = ACCESS_CAN_SPECULATE);
+
+ if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
+ binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
+ /* We know a priori that the the .w compnent (offset) is zero */
+ assert(num_components == 4 && bit_size == 32);
+ desc = nir_vector_insert_imm(b, desc, nir_imm_int(b, 0), 3);
+ }
+ return desc;
+ }
+ }
+}
+
+static bool
+is_idx_intrin(nir_intrinsic_instr *intrin)
+{
+ while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
+ intrin = nir_src_as_intrinsic(intrin->src[0]);
+ if (intrin == NULL)
+ return false;
+ }
+
+ return intrin->intrinsic == nir_intrinsic_vulkan_resource_index;
+}
+
+static nir_def *
+load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
+ const struct lower_descriptors_ctx *ctx)
+{
+ nir_def *index = nir_imm_int(b, 0);
+
+ while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) {
+ index = nir_iadd(b, index, intrin->src[1].ssa);
+ intrin = nir_src_as_intrinsic(intrin->src[0]);
+ }
+
+ assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
+ uint32_t set = nir_intrinsic_desc_set(intrin);
+ uint32_t binding = nir_intrinsic_binding(intrin);
+ index = nir_iadd(b, index, intrin->src[0].ssa);
+
+ return load_descriptor(b, 4, 32, set, binding, index, 0, ctx);
+}
+
+static bool
+try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
+ const struct lower_descriptors_ctx *ctx)
+{
+ ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
+ b->cursor = nir_before_instr(&intrin->instr);
+
+ nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]);
+ if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) {
+ assert(desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ||
+ desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
+ return false;
+ }
+
+ nir_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx);
+
+ nir_def_rewrite_uses(&intrin->def, desc);
+
+ return true;
+}
+
+static bool
+_lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin,
+ uint32_t root_table_offset)
+{
+ b->cursor = nir_instr_remove(&intrin->instr);
+ assert((root_table_offset & 3) == 0 && "aligned");
+
+ nir_def *val = load_root(b, intrin->def.num_components, intrin->def.bit_size,
+ nir_imm_int(b, root_table_offset), 4);
+
+ nir_def_rewrite_uses(&intrin->def, val);
+
+ return true;
+}
+
+#define lower_sysval_to_root_table(b, intrin, member) \
+ _lower_sysval_to_root_table(b, intrin, kk_root_descriptor_offset(member))
+
+static bool
+lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load,
+ const struct lower_descriptors_ctx *ctx)
+{
+ const uint32_t push_region_offset = kk_root_descriptor_offset(push);
+ const uint32_t base = nir_intrinsic_base(load);
+
+ b->cursor = nir_before_instr(&load->instr);
+
+ nir_def *offset =
+ nir_iadd_imm(b, load->src[0].ssa, push_region_offset + base);
+
+ nir_def *val = load_root(b, load->def.num_components, load->def.bit_size,
+ offset, load->def.bit_size / 8);
+
+ nir_def_rewrite_uses(&load->def, val);
+
+ return true;
+}
+
+static void
+get_resource_deref_binding(nir_builder *b, nir_deref_instr *deref,
+ uint32_t *set, uint32_t *binding, nir_def **index)
+{
+ if (deref->deref_type == nir_deref_type_array) {
+ *index = deref->arr.index.ssa;
+ deref = nir_deref_instr_parent(deref);
+ } else {
+ *index = nir_imm_int(b, 0);
+ }
+
+ assert(deref->deref_type == nir_deref_type_var);
+ nir_variable *var = deref->var;
+
+ *set = var->data.descriptor_set;
+ *binding = var->data.binding;
+}
+
+static nir_def *
+load_resource_addr(nir_builder *b, unsigned num_components, unsigned bit_size,
+ nir_deref_instr *deref, unsigned offset_B,
+ const struct lower_descriptors_ctx *ctx)
+{
+ uint32_t set, binding;
+ nir_def *index;
+ get_resource_deref_binding(b, deref, &set, &binding, &index);
+
+ const struct kk_descriptor_set_binding_layout *binding_layout =
+ get_binding_layout(set, binding, ctx);
+
+ if (ctx->clamp_desc_array_bounds)
+ index =
+ nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1));
+
+ assert(binding_layout->stride > 0);
+ nir_def *desc_ubo_offset =
+ nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride),
+ binding_layout->offset + offset_B);
+
+ return nir_iadd(b, load_descriptor_set_addr(b, set, ctx),
+ nir_u2u64(b, desc_ubo_offset));
+}
+
+static nir_def *
+load_resource_deref_desc(nir_builder *b, unsigned num_components,
+ unsigned bit_size, nir_deref_instr *deref,
+ unsigned offset_B,
+ const struct lower_descriptors_ctx *ctx)
+{
+ uint32_t set, binding;
+ nir_def *index;
+ get_resource_deref_binding(b, deref, &set, &binding, &index);
+ return load_descriptor(b, num_components, bit_size, set, binding, index,
+ offset_B, ctx);
+}
+
+static bool
+lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intr,
+ const struct lower_descriptors_ctx *ctx)
+{
+ b->cursor = nir_before_instr(&intr->instr);
+ nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
+
+ /* Reads and queries use the texture descriptor; writes and atomics PBE. */
+ unsigned offs =
+ offsetof(struct kk_storage_image_descriptor, image_gpu_resource_id);
+
+ nir_def *resource_addr = load_resource_addr(b, 1, 64, deref, offs, ctx);
+
+ nir_alu_type type;
+ if (nir_intrinsic_has_atomic_op(intr)) {
+ type = nir_atomic_op_type(nir_intrinsic_atomic_op(intr));
+ type |= intr->src[3].ssa->bit_size;
+ } else if (nir_intrinsic_has_dest_type(intr)) {
+ type = nir_intrinsic_dest_type(intr);
+ } else if (nir_intrinsic_has_src_type(intr)) {
+ type = nir_intrinsic_src_type(intr);
+ } else {
+ type = nir_type_uint32;
+ }
+
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ nir_def *handle = nir_load_texture_handle_kk(
+ b, 1, 64, resource_addr, .dest_type = type,
+ .image_dim = nir_intrinsic_image_dim(intr),
+ .image_array = nir_intrinsic_image_array(intr),
+ .flags = msl_convert_access_flag(var->data.access));
+
+ nir_rewrite_image_intrinsic(intr, handle, true);
+
+ return true;
+}
+
+static bool
+try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
+ const struct lower_descriptors_ctx *ctx)
+{
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_constant:
+ return lower_load_constant(b, intrin, ctx);
+
+ case nir_intrinsic_load_vulkan_descriptor:
+ return try_lower_load_vulkan_descriptor(b, intrin, ctx);
+
+ case nir_intrinsic_load_workgroup_size:
+ UNREACHABLE("Should have been lowered by nir_lower_cs_intrinsics()");
+
+ case nir_intrinsic_load_base_workgroup_id:
+ return lower_sysval_to_root_table(b, intrin, cs.base_group);
+
+ case nir_intrinsic_load_blend_const_color_rgba:
+ return lower_sysval_to_root_table(b, intrin, draw.blend_constant);
+
+ case nir_intrinsic_load_push_constant:
+ return lower_load_push_constant(b, intrin, ctx);
+
+ case nir_intrinsic_image_deref_load:
+ case nir_intrinsic_image_deref_sparse_load:
+ case nir_intrinsic_image_deref_store:
+ case nir_intrinsic_image_deref_atomic:
+ case nir_intrinsic_image_deref_atomic_swap:
+ case nir_intrinsic_image_deref_size:
+ case nir_intrinsic_image_deref_samples:
+ case nir_intrinsic_image_deref_store_block_agx:
+ return lower_image_intrin(b, intrin, ctx);
+
+ default:
+ return false;
+ }
+}
+
+static bool
+lower_tex(nir_builder *b, nir_tex_instr *tex,
+ const struct lower_descriptors_ctx *ctx)
+{
+ b->cursor = nir_before_instr(&tex->instr);
+
+ nir_def *texture = nir_steal_tex_src(tex, nir_tex_src_texture_deref);
+ nir_def *sampler = nir_steal_tex_src(tex, nir_tex_src_sampler_deref);
+ if (!texture) {
+ assert(!sampler);
+ return false;
+ }
+
+ nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane);
+ const uint32_t plane =
+ plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0;
+ const uint64_t plane_offset_B =
+ plane * sizeof(struct kk_sampled_image_descriptor);
+
+ /* LOD bias is passed in the descriptor set, rather than embedded into
+ * the sampler descriptor. There's no spot in the hardware descriptor,
+ * plus this saves on precious sampler heap spots.
+ */
+ if (tex->op == nir_texop_lod_bias) {
+ unsigned offs =
+ offsetof(struct kk_sampled_image_descriptor, lod_bias_fp16);
+
+ nir_def *bias = load_resource_deref_desc(
+ b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
+ plane_offset_B + offs, ctx);
+
+ nir_def_replace(&tex->def, bias);
+ return true;
+ }
+
+ // if (tex->op == nir_texop_image_min_lod_agx) {
+ // assert(tex->dest_type == nir_type_float16 ||
+ // tex->dest_type == nir_type_uint16);
+
+ // unsigned offs =
+ // tex->dest_type == nir_type_float16
+ // ? offsetof(struct kk_sampled_image_descriptor, min_lod_fp16)
+ // : offsetof(struct kk_sampled_image_descriptor, min_lod_uint16);
+
+ // nir_def *min = load_resource_deref_desc(
+ // b, 1, 16, nir_src_as_deref(nir_src_for_ssa(texture)),
+ // plane_offset_B + offs, ctx);
+
+ // nir_def_replace(&tex->def, min);
+ // return true;
+ // }
+
+ // if (tex->op == nir_texop_has_custom_border_color_agx) {
+ // unsigned offs = offsetof(struct kk_sampled_image_descriptor,
+ // clamp_0_sampler_index_or_negative);
+
+ // nir_def *res = load_resource_deref_desc(
+ // b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
+ // plane_offset_B + offs, ctx);
+
+ // nir_def_replace(&tex->def, nir_ige_imm(b, res, 0));
+ // return true;
+ // }
+
+ if (tex->op == nir_texop_custom_border_color_agx) {
+ unsigned offs = offsetof(struct kk_sampled_image_descriptor, border);
+
+ nir_def *border = load_resource_deref_desc(
+ b, 4, 32, nir_src_as_deref(nir_src_for_ssa(sampler)),
+ plane_offset_B + offs, ctx);
+
+ nir_alu_type T = nir_alu_type_get_base_type(tex->dest_type);
+ border = nir_convert_to_bit_size(b, border, T, tex->def.bit_size);
+
+ nir_def_replace(&tex->def, border);
+ return true;
+ }
+
+ {
+ unsigned offs =
+ offsetof(struct kk_sampled_image_descriptor, image_gpu_resource_id);
+
+ nir_def *resource_addr = load_resource_addr(
+ b, 1, 64, nir_src_as_deref(nir_src_for_ssa(texture)),
+ plane_offset_B + offs, ctx);
+
+ nir_def *handle = NULL;
+ if (tex->is_shadow) {
+ handle = nir_load_depth_texture_kk(b, 1, 64, resource_addr,
+ .image_dim = tex->sampler_dim,
+ .image_array = tex->is_array);
+ } else {
+ handle = nir_load_texture_handle_kk(
+ b, 1, 64, resource_addr, .dest_type = tex->dest_type,
+ .image_dim = tex->sampler_dim, .image_array = tex->is_array);
+ }
+ nir_tex_instr_add_src(tex, nir_tex_src_texture_handle, handle);
+ }
+
+ if (sampler != NULL) {
+ unsigned offs =
+ offsetof(struct kk_sampled_image_descriptor, sampler_index);
+
+ nir_def *index = load_resource_deref_desc(
+ b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
+ plane_offset_B + offs, ctx);
+
+ nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle,
+ nir_load_sampler_handle_kk(b, index));
+ }
+
+ if (tex->op == nir_texop_lod) {
+ nir_def *lod_min = nir_f2f32(
+ b, load_resource_deref_desc(
+ b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
+ plane_offset_B +
+ offsetof(struct kk_sampled_image_descriptor, lod_min_fp16),
+ ctx));
+ nir_def *lod_max = nir_f2f32(
+ b, load_resource_deref_desc(
+ b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)),
+ plane_offset_B +
+ offsetof(struct kk_sampled_image_descriptor, lod_max_fp16),
+ ctx));
+
+ nir_tex_instr_add_src(tex, nir_tex_src_min_lod, lod_min);
+ nir_tex_instr_add_src(tex, nir_tex_src_max_lod_kk, lod_max);
+ }
+
+ return true;
+}
+
+static bool
+try_lower_descriptors_instr(nir_builder *b, nir_instr *instr, void *_data)
+{
+ const struct lower_descriptors_ctx *ctx = _data;
+
+ switch (instr->type) {
+ case nir_instr_type_tex:
+ return lower_tex(b, nir_instr_as_tex(instr), ctx);
+ case nir_instr_type_intrinsic:
+ return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx);
+ default:
+ return false;
+ }
+}
+
+static bool
+lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin,
+ const struct lower_descriptors_ctx *ctx)
+{
+ const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
+ if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
+ desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ return false;
+
+ b->cursor = nir_instr_remove(&intrin->instr);
+
+ uint32_t set = nir_intrinsic_desc_set(intrin);
+ uint32_t binding = nir_intrinsic_binding(intrin);
+ nir_def *index = intrin->src[0].ssa;
+
+ const struct kk_descriptor_set_binding_layout *binding_layout =
+ get_binding_layout(set, binding, ctx);
+
+ nir_def *binding_addr;
+ uint8_t binding_stride;
+ switch (binding_layout->type) {
+ case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
+ nir_def *set_addr = load_descriptor_set_addr(b, set, ctx);
+ binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset);
+ binding_stride = binding_layout->stride;
+ break;
+ }
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+ nir_def *root_desc_addr = nir_load_buffer_ptr_kk(b, 1, 64, .binding = 0);
+
+ nir_def *dynamic_buffer_start =
+ nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx),
+ binding_layout->dynamic_buffer_index);
+
+ nir_def *dynamic_binding_offset =
+ nir_iadd_imm(b,
+ nir_imul_imm(b, dynamic_buffer_start,
+ sizeof(struct kk_buffer_address)),
+ kk_root_descriptor_offset(dynamic_buffers));
+
+ binding_addr =
+ nir_iadd(b, root_desc_addr, nir_u2u64(b, dynamic_binding_offset));
+ binding_stride = sizeof(struct kk_buffer_address);
+ break;
+ }
+
+ default:
+ UNREACHABLE("Not an SSBO descriptor");
+ }
+
+ /* Tuck the stride in the top 8 bits of the binding address */
+ binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56);
+
+ const uint32_t binding_size = binding_layout->array_size * binding_stride;
+ nir_def *offset_in_binding = nir_imul_imm(b, index, binding_stride);
+
+ nir_def *addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr),
+ nir_unpack_64_2x32_split_y(b, binding_addr),
+ nir_imm_int(b, binding_size), offset_in_binding);
+
+ nir_def_rewrite_uses(&intrin->def, addr);
+
+ return true;
+}
+
+static bool
+lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin,
+ const struct lower_descriptors_ctx *ctx)
+{
+ const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
+ if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
+ desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ return false;
+
+ b->cursor = nir_instr_remove(&intrin->instr);
+
+ nir_def *addr = intrin->src[0].ssa;
+ nir_def *index = intrin->src[1].ssa;
+
+ nir_def *addr_high32 = nir_channel(b, addr, 1);
+ nir_def *stride = nir_ushr_imm(b, addr_high32, 24);
+ nir_def *offset = nir_imul(b, index, stride);
+
+ addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format, nir_var_mem_ssbo,
+ offset);
+ nir_def_rewrite_uses(&intrin->def, addr);
+
+ return true;
+}
+
+static bool
+lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
+ const struct lower_descriptors_ctx *ctx)
+{
+ const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
+ if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER &&
+ desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ return false;
+
+ b->cursor = nir_instr_remove(&intrin->instr);
+
+ nir_def *addr = intrin->src[0].ssa;
+
+ nir_def *desc;
+ switch (ctx->ssbo_addr_format) {
+ case nir_address_format_64bit_global_32bit_offset: {
+ nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
+ nir_def *offset = nir_channel(b, addr, 3);
+ /* Mask off the binding stride */
+ base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
+ desc = nir_load_global_constant_offset(b, 4, 32, base, offset,
+ .align_mul = 16, .align_offset = 0,
+ .access = ACCESS_CAN_SPECULATE);
+ break;
+ }
+
+ case nir_address_format_64bit_bounded_global: {
+ nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2));
+ nir_def *size = nir_channel(b, addr, 2);
+ nir_def *offset = nir_channel(b, addr, 3);
+ /* Mask off the binding stride */
+ base = nir_iand_imm(b, base, BITFIELD64_MASK(56));
+ desc = nir_load_global_constant_bounded(
+ b, 4, 32, base, offset, size, .align_mul = 16, .align_offset = 0,
+ .access = ACCESS_CAN_SPECULATE);
+ break;
+ }
+
+ default:
+ UNREACHABLE("Unknown address mode");
+ }
+
+ nir_def_rewrite_uses(&intrin->def, desc);
+
+ return true;
+}
+
+static bool
+lower_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intr, void *_data)
+{
+ const struct lower_descriptors_ctx *ctx = _data;
+
+ switch (intr->intrinsic) {
+ case nir_intrinsic_vulkan_resource_index:
+ return lower_ssbo_resource_index(b, intr, ctx);
+ case nir_intrinsic_vulkan_resource_reindex:
+ return lower_ssbo_resource_reindex(b, intr, ctx);
+ case nir_intrinsic_load_vulkan_descriptor:
+ return lower_load_ssbo_descriptor(b, intr, ctx);
+ default:
+ return false;
+ }
+}
+
+bool
+kk_nir_lower_descriptors(nir_shader *nir,
+ const struct vk_pipeline_robustness_state *rs,
+ uint32_t set_layout_count,
+ struct vk_descriptor_set_layout *const *set_layouts)
+{
+ struct lower_descriptors_ctx ctx = {
+ .clamp_desc_array_bounds =
+ rs->storage_buffers !=
+ VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
+ rs->uniform_buffers !=
+ VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT ||
+ rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT,
+ .ssbo_addr_format = kk_buffer_addr_format(rs->storage_buffers),
+ .ubo_addr_format = kk_buffer_addr_format(rs->uniform_buffers),
+ };
+
+ assert(set_layout_count <= KK_MAX_SETS);
+ for (uint32_t s = 0; s < set_layout_count; s++) {
+ if (set_layouts[s] != NULL)
+ ctx.set_layouts[s] = vk_to_kk_descriptor_set_layout(set_layouts[s]);
+ }
+
+ /* First lower everything but complex SSBOs, then lower complex SSBOs.
+ *
+ * TODO: See if we can unify this, not sure if the fast path matters on
+ * Apple. This is inherited from NVK.
+ */
+ bool pass_lower_descriptors = nir_shader_instructions_pass(
+ nir, try_lower_descriptors_instr, nir_metadata_control_flow, &ctx);
+
+ bool pass_lower_ssbo = nir_shader_intrinsics_pass(
+ nir, lower_ssbo_descriptor, nir_metadata_control_flow, &ctx);
+
+ return pass_lower_descriptors || pass_lower_ssbo;
+}
diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_multiview.c b/src/kosmickrisp/vulkan/kk_nir_lower_multiview.c
new file mode 100644
index 00000000000..f90e50339f2
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_nir_lower_multiview.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_shader.h"
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/* View index maps to layer id in Metal */
+static bool
+replace_view_index_with_zero(nir_builder *b, nir_intrinsic_instr *instr,
+ void *data)
+{
+ if (instr->intrinsic != nir_intrinsic_load_view_index)
+ return false;
+
+ b->cursor = nir_before_instr(&instr->instr);
+ nir_def *layer_id = nir_load_layer_id(b);
+ nir_def_replace(&instr->def, layer_id);
+ return true;
+}
+
+/* View index maps to layer id in Metal */
+static bool
+replace_view_index_with_layer_id(nir_builder *b, nir_intrinsic_instr *instr,
+ void *data)
+{
+ if (instr->intrinsic != nir_intrinsic_load_view_index)
+ return false;
+
+ b->cursor = nir_before_instr(&instr->instr);
+ nir_def *layer_id = nir_load_layer_id(b);
+ nir_def_replace(&instr->def, layer_id);
+ return true;
+}
+
+static bool
+replace_view_id_with_value(nir_builder *b, nir_intrinsic_instr *instr,
+ void *data)
+{
+ if (instr->intrinsic != nir_intrinsic_load_view_index)
+ return false;
+
+ b->cursor = nir_before_instr(&instr->instr);
+ nir_def *view_index = (nir_def *)data;
+ nir_def_replace(&instr->def, view_index);
+ return true;
+}
+
+bool
+kk_nir_lower_vs_multiview(nir_shader *nir, uint32_t view_mask)
+{
+ assert(nir->info.stage == MESA_SHADER_VERTEX);
+
+ /* Embed view indices and return */
+ uint32_t view_count = util_bitcount(view_mask);
+ nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir);
+ nir_builder b = nir_builder_at(nir_before_impl(entrypoint));
+
+ /* Create array and initialize */
+ nir_variable *view_indices = nir_local_variable_create(
+ entrypoint, glsl_array_type(glsl_uint_type(), view_count, 0),
+ "view_indices");
+ nir_deref_instr *view_indices_deref = nir_build_deref_var(&b, view_indices);
+ uint32_t count = 0u;
+ u_foreach_bit(index, view_mask) {
+ nir_store_deref(
+ &b, nir_build_deref_array_imm(&b, view_indices_deref, count++),
+ nir_imm_int(&b, index), 1);
+ }
+
+ /* Access array based on the amplification id */
+ nir_def *amplification_id = nir_load_amplification_id_kk(&b);
+ nir_def *view_index = nir_load_deref(
+ &b, nir_build_deref_array(&b, view_indices_deref, amplification_id));
+
+ bool progress = nir_shader_intrinsics_pass(
+ nir, replace_view_id_with_value, nir_metadata_control_flow, view_index);
+
+ if (progress) {
+ BITSET_SET(nir->info.system_values_read,
+ SYSTEM_VALUE_AMPLIFICATION_ID_KK);
+ }
+
+ /* With a single view index, Metal's vertex amplification will disregard the
+ * render target offset. We need to apply it ourselves in shader */
+ if (view_count == 1u) {
+ nir_variable *layer_id = nir_create_variable_with_location(
+ nir, nir_var_shader_out, VARYING_SLOT_LAYER, glsl_uint_type());
+ nir_deref_instr *layer_id_deref = nir_build_deref_var(&b, layer_id);
+ nir_def *view_index = nir_imm_int(&b, util_last_bit(view_mask) - 1u);
+ nir_store_deref(&b, layer_id_deref, view_index, 0xFFFFFFFF);
+
+ nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_LAYER);
+ progress = true;
+ }
+
+ return progress;
+}
+
+bool
+kk_nir_lower_fs_multiview(nir_shader *nir, uint32_t view_mask)
+{
+ if (view_mask == 0u)
+ return nir_shader_intrinsics_pass(nir, replace_view_index_with_zero,
+ nir_metadata_control_flow, NULL);
+
+ return nir_shader_intrinsics_pass(nir, replace_view_index_with_layer_id,
+ nir_metadata_control_flow, NULL);
+}
diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_textures.c b/src/kosmickrisp/vulkan/kk_nir_lower_textures.c
new file mode 100644
index 00000000000..85339be414c
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_nir_lower_textures.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2023 Valve Corporation
+ * Copyright 2021 Alyssa Rosenzweig
+ * Copyright 2020 Collabora Ltd.
+ * Copyright 2016 Broadcom
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+#include "kk_private.h"
+
+#include "kk_descriptor_types.h"
+#include "kk_shader.h"
+
+#include "nir.h"
+#include "nir_builder.h"
+
+#include "stdbool.h"
+
+static bool
+lower_texture_buffer_tex_instr(nir_builder *b, nir_tex_instr *tex)
+{
+ if (tex->sampler_dim != GLSL_SAMPLER_DIM_BUF)
+ return false;
+
+ nir_steal_tex_src(tex, nir_tex_src_lod);
+ return true;
+}
+
+static void
+lower_1d_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+ nir_def *coord = intrin->src[1].ssa;
+ bool is_array = nir_intrinsic_image_array(intrin);
+ nir_def *zero = nir_imm_intN_t(b, 0, coord->bit_size);
+
+ if (is_array) {
+ assert(coord->num_components >= 2);
+ coord =
+ nir_vec3(b, nir_channel(b, coord, 0), zero, nir_channel(b, coord, 1));
+ } else {
+ assert(coord->num_components >= 1);
+ coord = nir_vec2(b, coord, zero);
+ }
+
+ nir_src_rewrite(&intrin->src[1], nir_pad_vector(b, coord, 4));
+ nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D);
+}
+
+static nir_def *
+txs_for_image(nir_builder *b, nir_intrinsic_instr *intr,
+ unsigned num_components, unsigned bit_size, bool query_samples)
+{
+ nir_tex_instr *tex = nir_tex_instr_create(b->shader, query_samples ? 1 : 2);
+ tex->op = query_samples ? nir_texop_texture_samples : nir_texop_txs;
+ tex->is_array = nir_intrinsic_image_array(intr);
+ tex->dest_type = nir_type_uint32;
+ tex->sampler_dim = nir_intrinsic_image_dim(intr);
+
+ tex->src[0] =
+ nir_tex_src_for_ssa(nir_tex_src_texture_handle, intr->src[0].ssa);
+
+ if (!query_samples)
+ tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, intr->src[1].ssa);
+
+ nir_def_init(&tex->instr, &tex->def, num_components, bit_size);
+ nir_builder_instr_insert(b, &tex->instr);
+ nir_def *res = &tex->def;
+
+ /* Cube images are implemented as 2D arrays, so we need to divide here. */
+ if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && res->num_components > 2 &&
+ !query_samples) {
+ nir_def *divided = nir_udiv_imm(b, nir_channel(b, res, 2), 6);
+ res = nir_vector_insert_imm(b, res, divided, 2);
+ }
+
+ return res;
+}
+
+/* Cube textures need to be loaded as cube textures for sampling, but for
+ * storage we need to load them as 2d array since Metal does not support atomics
+ * on cube images. However, we don't know how the texture will be used when we
+ * load the handle so we need to do it when we actually use it. */
+static void
+lower_cube_load_handle_to_2d_array(nir_def *handle)
+{
+ nir_instr *handle_parent = handle->parent_instr;
+ assert(handle_parent->type == nir_instr_type_intrinsic);
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(handle_parent);
+ assert(intrin->intrinsic == nir_intrinsic_load_texture_handle_kk);
+ assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE);
+ nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D);
+ nir_intrinsic_set_image_array(intrin, true);
+}
+
+static void
+lower_cube_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+ assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE);
+ nir_def *coord = intrin->src[1].ssa;
+ if (nir_intrinsic_image_array(intrin)) {
+ assert(coord->num_components >= 4);
+ nir_def *layer_index =
+ nir_iadd(b, nir_channel(b, coord, 2),
+ nir_imul_imm(b, nir_channel(b, coord, 3), 6));
+ coord = nir_vec4(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1),
+ layer_index, nir_imm_intN_t(b, 0, coord->bit_size));
+ }
+ nir_src_rewrite(&intrin->src[1], nir_pad_vector(b, coord, 4));
+ nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D);
+ nir_intrinsic_set_image_array(intrin, true);
+
+ lower_cube_load_handle_to_2d_array(intrin->src[0].ssa);
+}
+
+static bool
+lower_image_load_store(nir_builder *b, nir_intrinsic_instr *intrin)
+{
+ b->cursor = nir_before_instr(&intrin->instr);
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_texture_handle_kk:
+ switch (nir_intrinsic_image_dim(intrin)) {
+ case GLSL_SAMPLER_DIM_1D:
+ nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D);
+ return true;
+ default:
+ return false;
+ }
+ case nir_intrinsic_image_load:
+ case nir_intrinsic_image_store:
+ case nir_intrinsic_image_atomic:
+ case nir_intrinsic_image_atomic_swap:
+ case nir_intrinsic_bindless_image_load:
+ case nir_intrinsic_bindless_image_sparse_load:
+ case nir_intrinsic_bindless_image_store:
+ case nir_intrinsic_bindless_image_atomic:
+ case nir_intrinsic_bindless_image_atomic_swap:
+ switch (nir_intrinsic_image_dim(intrin)) {
+ case GLSL_SAMPLER_DIM_1D:
+ lower_1d_image_intrin(b, intrin);
+ return true;
+ case GLSL_SAMPLER_DIM_CUBE:
+ lower_cube_image_intrin(b, intrin);
+ return true;
+ default:
+ return false;
+ }
+ case nir_intrinsic_bindless_image_size:
+ case nir_intrinsic_bindless_image_samples:
+ nir_def_rewrite_uses(
+ &intrin->def,
+ txs_for_image(
+ b, intrin, intrin->def.num_components, intrin->def.bit_size,
+ intrin->intrinsic == nir_intrinsic_bindless_image_samples));
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+lower_image(nir_builder *b, nir_instr *instr)
+{
+ if (instr->type == nir_instr_type_tex) {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ return lower_texture_buffer_tex_instr(b, tex);
+ } else if (instr->type == nir_instr_type_intrinsic) {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ return lower_image_load_store(b, intrin);
+ }
+
+ return false;
+}
+
+/* Must go after descriptor lowering to ensure the instr we introduce are also
+ * lowered */
+bool
+kk_nir_lower_textures(nir_shader *nir)
+{
+ bool progress = false;
+ nir_foreach_function_impl(impl, nir) {
+ nir_foreach_block_safe(block, impl) {
+ nir_builder b = nir_builder_create(impl);
+ bool progress_impl = false;
+ nir_foreach_instr_safe(instr, block) {
+ progress_impl |= lower_image(&b, instr);
+ }
+ progress |=
+ nir_progress(progress_impl, impl, nir_metadata_control_flow);
+ }
+ }
+ return progress;
+}
diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_vbo.c b/src/kosmickrisp/vulkan/kk_nir_lower_vbo.c
new file mode 100644
index 00000000000..7dc3764a6b2
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_nir_lower_vbo.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright 2022 Alyssa Rosenzweig
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_nir_lower_vbo.h"
+
+#include "kk_cmd_buffer.h"
+
+#include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_format_convert.h"
+#include "util/bitset.h"
+#include "util/u_math.h"
+#include "shader_enums.h"
+
+struct ctx {
+ struct kk_attribute *attribs;
+ bool requires_vertex_id;
+ bool requires_instance_id;
+ bool requires_base_instance;
+};
+
+static bool
+is_rgb10_a2(const struct util_format_description *desc)
+{
+ return desc->channel[0].shift == 0 && desc->channel[0].size == 10 &&
+ desc->channel[1].shift == 10 && desc->channel[1].size == 10 &&
+ desc->channel[2].shift == 20 && desc->channel[2].size == 10 &&
+ desc->channel[3].shift == 30 && desc->channel[3].size == 2;
+}
+
+static bool
+is_rg11_b10(const struct util_format_description *desc)
+{
+ return desc->channel[0].shift == 0 && desc->channel[0].size == 11 &&
+ desc->channel[1].shift == 11 && desc->channel[1].size == 11 &&
+ desc->channel[2].shift == 22 && desc->channel[2].size == 10;
+}
+
+static enum pipe_format
+kk_vbo_internal_format(enum pipe_format format)
+{
+ const struct util_format_description *desc = util_format_description(format);
+
+ /* RGB10A2 and RG11B10 require loading as uint and then unpack */
+ if (is_rgb10_a2(desc) || is_rg11_b10(desc))
+ return PIPE_FORMAT_R32_UINT;
+
+ /* R11G11B10F is native and special */
+ if (format == PIPE_FORMAT_R11G11B10_FLOAT)
+ return format;
+
+ /* No other non-array formats handled */
+ if (!desc->is_array)
+ return PIPE_FORMAT_NONE;
+
+ /* Otherwise look at one (any) channel */
+ int idx = util_format_get_first_non_void_channel(format);
+ if (idx < 0)
+ return PIPE_FORMAT_NONE;
+
+ /* We only handle RGB formats (we could do SRGB if we wanted though?) */
+ if ((desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) ||
+ (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN))
+ return PIPE_FORMAT_NONE;
+
+ /* We have native 8-bit and 16-bit normalized formats */
+ struct util_format_channel_description chan = desc->channel[idx];
+
+ /* Otherwise map to the corresponding integer format */
+ switch (chan.size) {
+ case 32:
+ return PIPE_FORMAT_R32_UINT;
+ case 16:
+ return PIPE_FORMAT_R16_UINT;
+ case 8:
+ return PIPE_FORMAT_R8_UINT;
+ default:
+ return PIPE_FORMAT_NONE;
+ }
+}
+
+bool
+kk_vbo_supports_format(enum pipe_format format)
+{
+ return kk_vbo_internal_format(format) != PIPE_FORMAT_NONE;
+}
+
+static nir_def *
+apply_swizzle_channel(nir_builder *b, nir_def *vec, unsigned swizzle,
+ bool is_int)
+{
+ switch (swizzle) {
+ case PIPE_SWIZZLE_X:
+ return nir_channel(b, vec, 0);
+ case PIPE_SWIZZLE_Y:
+ return nir_channel(b, vec, 1);
+ case PIPE_SWIZZLE_Z:
+ return nir_channel(b, vec, 2);
+ case PIPE_SWIZZLE_W:
+ return nir_channel(b, vec, 3);
+ case PIPE_SWIZZLE_0:
+ return nir_imm_intN_t(b, 0, vec->bit_size);
+ case PIPE_SWIZZLE_1:
+ return is_int ? nir_imm_intN_t(b, 1, vec->bit_size)
+ : nir_imm_floatN_t(b, 1.0, vec->bit_size);
+ default:
+ UNREACHABLE("Invalid swizzle channel");
+ }
+}
+
+static bool
+pass(struct nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ if (intr->intrinsic != nir_intrinsic_load_input)
+ return false;
+
+ struct ctx *ctx = data;
+ struct kk_attribute *attribs = ctx->attribs;
+ b->cursor = nir_instr_remove(&intr->instr);
+
+ nir_src *offset_src = nir_get_io_offset_src(intr);
+ assert(nir_src_is_const(*offset_src) && "no attribute indirects");
+ unsigned index = nir_intrinsic_base(intr) + nir_src_as_uint(*offset_src);
+
+ struct kk_attribute attrib = attribs[index];
+
+ const struct util_format_description *desc =
+ util_format_description(attrib.format);
+ int chan = util_format_get_first_non_void_channel(attrib.format);
+ assert(chan >= 0);
+
+ bool is_float = desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
+ bool is_unsigned = desc->channel[chan].type == UTIL_FORMAT_TYPE_UNSIGNED;
+ bool is_signed = desc->channel[chan].type == UTIL_FORMAT_TYPE_SIGNED;
+ bool is_fixed = desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
+ bool is_int = util_format_is_pure_integer(attrib.format);
+
+ assert((is_float ^ is_unsigned ^ is_signed ^ is_fixed) && "Invalid format");
+
+ enum pipe_format interchange_format = kk_vbo_internal_format(attrib.format);
+ assert(interchange_format != PIPE_FORMAT_NONE);
+
+ unsigned interchange_align = util_format_get_blocksize(interchange_format);
+ unsigned interchange_comps = util_format_get_nr_components(attrib.format);
+
+ /* In the hardware, uint formats zero-extend and float formats convert.
+ * However, non-uint formats using a uint interchange format shouldn't be
+ * zero extended.
+ */
+ unsigned interchange_register_size =
+ util_format_is_pure_uint(interchange_format) ? (interchange_align * 8)
+ : intr->def.bit_size;
+
+ /* Non-UNORM R10G10B10A2 loaded as a scalar and unpacked */
+ if (interchange_format == PIPE_FORMAT_R32_UINT && !desc->is_array)
+ interchange_comps = 1;
+
+ /* Calculate the element to fetch the vertex for. Divide the instance ID by
+ * the divisor for per-instance data. Divisor=0 specifies per-vertex data.
+ */
+ nir_def *el;
+ if (attrib.instanced) {
+ if (attrib.divisor > 0) {
+ /* Metal's instance_id has base_instance included */
+ nir_def *instance_id =
+ nir_isub(b, nir_load_instance_id(b), nir_load_base_instance(b));
+ el = nir_udiv_imm(b, instance_id, attrib.divisor);
+ ctx->requires_instance_id = true;
+ } else
+ el = nir_imm_int(b, 0);
+
+ el = nir_iadd(b, el, nir_load_base_instance(b));
+ ctx->requires_base_instance = true;
+
+ BITSET_SET(b->shader->info.system_values_read,
+ SYSTEM_VALUE_BASE_INSTANCE);
+ } else {
+ el = nir_load_vertex_id(b);
+ ctx->requires_vertex_id = true;
+ }
+
+ /* Load the pointer of the buffer from the argument buffer */
+ nir_def *argbuf = nir_load_buffer_ptr_kk(b, 1, 64, .binding = 0);
+ uint64_t attrib_base_offset =
+ offsetof(struct kk_root_descriptor_table, draw.attrib_base[index]);
+ nir_def *base = nir_load_global_constant(
+ b, nir_iadd_imm(b, argbuf, attrib_base_offset), 8, 1, 64);
+
+ uint64_t buffer_stride_offset = offsetof(
+ struct kk_root_descriptor_table, draw.buffer_strides[attrib.binding]);
+ nir_def *stride = nir_load_global_constant(
+ b, nir_iadd_imm(b, argbuf, buffer_stride_offset), 4, 1, 32);
+ nir_def *stride_offset_el =
+ nir_imul(b, el, nir_udiv_imm(b, stride, interchange_align));
+
+ /* Load the raw vector */
+ nir_def *memory = nir_load_constant_agx(
+ b, interchange_comps, interchange_register_size, base, stride_offset_el,
+ .format = interchange_format, .base = 0u);
+
+ unsigned dest_size = intr->def.bit_size;
+ unsigned bits[] = {desc->channel[chan].size, desc->channel[chan].size,
+ desc->channel[chan].size, desc->channel[chan].size};
+
+ /* Unpack non-native formats */
+ if (is_rg11_b10(desc)) {
+ memory = nir_format_unpack_11f11f10f(b, memory);
+ } else if (is_rgb10_a2(desc)) {
+ bits[0] = 10;
+ bits[1] = 10;
+ bits[2] = 10;
+ bits[3] = 2;
+ if (is_signed)
+ memory = nir_format_unpack_sint(b, memory, bits, 4);
+ else
+ memory = nir_format_unpack_uint(b, memory, bits, 4);
+ }
+
+ if (desc->channel[chan].normalized) {
+ if (is_signed)
+ memory = nir_format_snorm_to_float(b, memory, bits);
+ else
+ memory = nir_format_unorm_to_float(b, memory, bits);
+ } else if (desc->channel[chan].pure_integer) {
+ if (is_signed)
+ memory = nir_i2iN(b, memory, dest_size);
+ else
+ memory = nir_u2uN(b, memory, dest_size);
+ } else {
+ if (is_unsigned)
+ memory = nir_u2fN(b, memory, dest_size);
+ else if (is_signed || is_fixed)
+ memory = nir_i2fN(b, memory, dest_size);
+ else
+ memory = nir_f2fN(b, memory, dest_size);
+
+ /* 16.16 fixed-point weirdo GL formats need to be scaled */
+ if (is_fixed) {
+ assert(desc->is_array && desc->channel[chan].size == 32);
+ assert(dest_size == 32 && "overflow if smaller");
+ memory = nir_fmul_imm(b, memory, 1.0 / 65536.0);
+ }
+ }
+
+ /* We now have a properly formatted vector of the components in memory. Apply
+ * the format swizzle forwards to trim/pad/reorder as needed.
+ */
+ nir_def *channels[4] = {NULL};
+
+ for (unsigned i = 0; i < intr->num_components; ++i) {
+ unsigned c = nir_intrinsic_component(intr) + i;
+ channels[i] = apply_swizzle_channel(b, memory, desc->swizzle[c], is_int);
+ }
+
+ nir_def *logical = nir_vec(b, channels, intr->num_components);
+ nir_def_rewrite_uses(&intr->def, logical);
+ return true;
+}
+
+bool
+kk_nir_lower_vbo(nir_shader *nir, struct kk_attribute *attribs)
+{
+ assert(nir->info.stage == MESA_SHADER_VERTEX);
+
+ struct ctx ctx = {.attribs = attribs};
+ bool progress =
+ nir_shader_intrinsics_pass(nir, pass, nir_metadata_control_flow, &ctx);
+
+ if (ctx.requires_instance_id)
+ BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID);
+ if (ctx.requires_base_instance)
+ BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
+ if (ctx.requires_vertex_id)
+ BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_VERTEX_ID);
+ return progress;
+}
diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_vbo.h b/src/kosmickrisp/vulkan/kk_nir_lower_vbo.h
new file mode 100644
index 00000000000..436e070794a
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_nir_lower_vbo.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2022 Alyssa Rosenzweig
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "util/format/u_formats.h"
+#include "nir.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define KK_MAX_ATTRIBS (32)
+#define KK_MAX_VBUFS (32)
+
+/* See pipe_vertex_element for justification on the sizes. This structure should
+ * be small so it can be embedded into a shader key.
+ */
+struct kk_attribute {
+ /* If instanced, Zero means all get the same value (Vulkan semantics). */
+ uint32_t divisor;
+ /* Buffer binding to load stride from root_table */
+ uint32_t binding;
+
+ /* pipe_format, all vertex formats should be <= 255 */
+ uint8_t format;
+
+ unsigned buf : 7;
+ bool instanced : 1;
+};
+
+bool kk_nir_lower_vbo(nir_shader *shader, struct kk_attribute *attribs);
+
+bool kk_vbo_supports_format(enum pipe_format format);
+
+#ifdef __cplusplus
+} /* extern C */
+#endif \ No newline at end of file
diff --git a/src/kosmickrisp/vulkan/kk_physical_device.c b/src/kosmickrisp/vulkan/kk_physical_device.c
new file mode 100644
index 00000000000..2f9ce537f6e
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_physical_device.c
@@ -0,0 +1,1032 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_physical_device.h"
+
+#include "kk_entrypoints.h"
+#include "kk_image.h"
+#include "kk_instance.h"
+#include "kk_nir_lower_vbo.h"
+#include "kk_sync.h"
+#include "kk_wsi.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+#include "util/disk_cache.h"
+#include "util/mesa-sha1.h"
+#include "git_sha1.h"
+
+#include "vulkan/wsi/wsi_common.h"
+#include "vk_device.h"
+#include "vk_drm_syncobj.h"
+#include "vk_shader_module.h"
+
+static uint32_t
+kk_get_vk_version()
+{
+ /* Version override takes priority */
+ const uint32_t version_override = vk_get_version_override();
+ if (version_override)
+ return version_override;
+
+ return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
+}
+
+static void
+kk_get_device_extensions(const struct kk_instance *instance,
+ struct vk_device_extension_table *ext)
+{
+ *ext = (struct vk_device_extension_table){
+ /* Vulkan 1.1 */
+ .KHR_16bit_storage = true,
+ .KHR_bind_memory2 = true,
+ .KHR_dedicated_allocation = true,
+ .KHR_descriptor_update_template = true,
+ .KHR_device_group = true,
+ .KHR_external_fence = true,
+ .KHR_external_memory = true,
+ .KHR_external_semaphore = true,
+ .KHR_get_memory_requirements2 = true,
+ .KHR_maintenance1 = true,
+ .KHR_maintenance2 = true,
+ .KHR_maintenance3 = true,
+ .KHR_multiview = true,
+ .KHR_relaxed_block_layout = true,
+ .KHR_sampler_ycbcr_conversion = true,
+ .KHR_shader_draw_parameters = false,
+ .KHR_storage_buffer_storage_class = true,
+ .KHR_variable_pointers = true,
+
+ /* Vulkan 1.2 */
+ .KHR_8bit_storage = true,
+ .KHR_buffer_device_address = true, /* Required in Vulkan 1.3 */
+ .KHR_create_renderpass2 = true,
+ .KHR_depth_stencil_resolve = true,
+ .KHR_draw_indirect_count = false,
+ .KHR_driver_properties = true,
+ .KHR_image_format_list = true,
+ .KHR_imageless_framebuffer = true,
+ .KHR_sampler_mirror_clamp_to_edge = false,
+ .KHR_separate_depth_stencil_layouts = true,
+ .KHR_shader_atomic_int64 = false,
+ .KHR_shader_float16_int8 =
+ false, /* TODO_KOSMICKRISP shaderInt8 shaderFloat16 */
+ .KHR_shader_float_controls = true,
+ .KHR_shader_subgroup_extended_types = true,
+ .KHR_spirv_1_4 = true,
+ .KHR_timeline_semaphore = true,
+ .KHR_uniform_buffer_standard_layout = true,
+ .KHR_vulkan_memory_model = true, /* Required in Vulkan 1.3 */
+ .EXT_descriptor_indexing = true,
+ .EXT_host_query_reset = true,
+ .EXT_sampler_filter_minmax = false,
+ .EXT_scalar_block_layout = true,
+ .EXT_separate_stencil_usage = true,
+ .EXT_shader_viewport_index_layer = false,
+
+ /* Vulkan 1.3 */
+ .KHR_copy_commands2 = true,
+ .KHR_dynamic_rendering = true,
+ .KHR_format_feature_flags2 = true,
+ .KHR_maintenance4 = true,
+ .KHR_shader_integer_dot_product = true,
+ .KHR_shader_non_semantic_info = true,
+ .KHR_shader_terminate_invocation = true,
+ .KHR_synchronization2 = true,
+ .KHR_zero_initialize_workgroup_memory = true,
+ .EXT_4444_formats = false,
+ .EXT_extended_dynamic_state = true,
+ .EXT_extended_dynamic_state2 = false,
+ .EXT_image_robustness = true,
+ .EXT_inline_uniform_block = true,
+ .EXT_pipeline_creation_cache_control = true,
+ .EXT_pipeline_creation_feedback = true,
+ .EXT_private_data = true,
+ .EXT_shader_demote_to_helper_invocation = true,
+ .EXT_subgroup_size_control = true,
+ .EXT_texel_buffer_alignment = false,
+ .EXT_texture_compression_astc_hdr = false,
+ .EXT_tooling_info = true,
+ .EXT_ycbcr_2plane_444_formats = false,
+
+ /* Vulkan 1.4 */
+ .KHR_push_descriptor = true,
+
+ /* Optional extensions */
+#ifdef KK_USE_WSI_PLATFORM
+ .KHR_swapchain = true,
+ .KHR_swapchain_mutable_format = true,
+#endif
+ .EXT_external_memory_metal = true,
+ .EXT_mutable_descriptor_type = true,
+ .EXT_shader_replicated_composites = true,
+
+ .KHR_shader_expect_assume = true,
+ .KHR_shader_maximal_reconvergence = true,
+ .KHR_shader_relaxed_extended_instruction = true,
+ .KHR_shader_subgroup_uniform_control_flow = true,
+
+ .GOOGLE_decorate_string = true,
+ .GOOGLE_hlsl_functionality1 = true,
+ .GOOGLE_user_type = true,
+ };
+}
+
+static void
+kk_get_device_features(
+ const struct vk_device_extension_table *supported_extensions,
+ struct vk_features *features)
+{
+ *features = (struct vk_features){
+ /* Vulkan 1.0 */
+ .robustBufferAccess = true,
+ .depthClamp = true,
+ .drawIndirectFirstInstance = true,
+ /* TODO_KOSMICKRISP
+ * Enabling fragmentStoresAndAtomics fails the following CTS tests, need
+ * to investigate:
+ * dEQP-VK.fragment_operations.early_fragment.discard_no_early_fragment_tests_depth
+ * dEQP-VK.robustness.image_robustness.bind.notemplate.*i.unroll.nonvolatile.sampled_image.no_fmt_qual.img.samples_1.*d_array.frag
+ */
+ .fragmentStoresAndAtomics = false,
+ .imageCubeArray = true,
+ .shaderInt16 = true,
+ .shaderInt64 = true,
+ .shaderResourceMinLod = true,
+ /* TODO_KOSMICKRISP
+ * Disabled because the following test
+ * dEQP-VK.api.format_feature_flags2.r8_unorm and similars fail, need to
+ * set VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT and
+ * VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT for those formats.
+ * This may trigger more tests that haven't been run yet */
+ .shaderStorageImageReadWithoutFormat = false,
+ .shaderStorageImageWriteWithoutFormat = false,
+ .shaderUniformBufferArrayDynamicIndexing = true,
+ .shaderSampledImageArrayDynamicIndexing = true,
+ .shaderStorageBufferArrayDynamicIndexing = true,
+ .shaderStorageImageArrayDynamicIndexing = true,
+
+ /* Vulkan 1.1 */
+ .multiview = true,
+ .storageBuffer16BitAccess = true,
+ .storageInputOutput16 = false,
+ .storagePushConstant16 = true,
+ .variablePointersStorageBuffer = true,
+ .variablePointers = true,
+ .uniformAndStorageBuffer16BitAccess = true,
+
+ /* Vulkan 1.2 */
+ .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
+ .descriptorBindingPartiallyBound = true,
+ .descriptorBindingSampledImageUpdateAfterBind = true,
+ .descriptorBindingStorageBufferUpdateAfterBind = true,
+ .descriptorBindingStorageImageUpdateAfterBind = true,
+ .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
+ .descriptorBindingUniformBufferUpdateAfterBind = true,
+ .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
+ .descriptorBindingUpdateUnusedWhilePending = true,
+ .descriptorBindingVariableDescriptorCount = true,
+ .descriptorIndexing = true,
+ .hostQueryReset = true,
+ .imagelessFramebuffer = true,
+ .runtimeDescriptorArray = true,
+ .scalarBlockLayout = true,
+ .separateDepthStencilLayouts = true,
+ /* TODO_KOSMICKRISP shaderFloat16
+ * Failing:
+ * dEQP-VK.spirv_assembly.instruction.compute.float16.opcompositeinsert.v4f16
+ * dEQP-VK.spirv_assembly.instruction.compute.float16.opcompositeinsert.v2f16arr5
+ * dEQP-VK.spirv_assembly.instruction.compute.float16.opcompositeinsert.v3f16arr5
+ * dEQP-VK.spirv_assembly.instruction.compute.float16.opcompositeinsert.v4f16arr3
+ * dEQP-VK.spirv_assembly.instruction.compute.float16.opcompositeinsert.struct16arr3
+ * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.v3f16_frag
+ * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.v4f16_frag
+ * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.v2f16arr5_frag
+ * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.v3f16arr5_frag
+ * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.v4f16arr3_frag
+ * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.struct16arr3_frag
+ * dEQP-VK.memory_model.shared.16bit.nested_structs_arrays.0
+ * dEQP-VK.memory_model.shared.16bit.nested_structs_arrays.4
+ */
+ .shaderFloat16 = false,
+ .shaderInputAttachmentArrayDynamicIndexing = true,
+ .shaderInputAttachmentArrayNonUniformIndexing = true,
+ /* TODO_KOSMICKRISP shaderInt8
+ * Multiple MSL compiler crashes if we enable shaderInt8, need to
+ * understand why and a workaround:
+ * dEQP-VK.memory_model.shared.8bit.vector_types.9
+ * dEQP-VK.memory_model.shared.8bit.basic_types.8
+ * dEQP-VK.memory_model.shared.8bit.basic_arrays.2
+ * dEQP-VK.memory_model.shared.8bit.arrays_of_arrays.1
+ * dEQP-VK.memory_model.shared.8bit.arrays_of_arrays.8
+ * Probably more
+ */
+ .shaderInt8 = false,
+ .shaderOutputViewportIndex = true,
+ .shaderOutputLayer = true,
+ .shaderSampledImageArrayNonUniformIndexing = true,
+ .shaderStorageBufferArrayNonUniformIndexing = true,
+ .shaderStorageTexelBufferArrayDynamicIndexing = true,
+ .shaderSubgroupExtendedTypes = true,
+ .shaderUniformTexelBufferArrayDynamicIndexing = true,
+ .shaderUniformTexelBufferArrayNonUniformIndexing = true,
+ .storageBuffer8BitAccess = true,
+ .storagePushConstant8 = true,
+ .subgroupBroadcastDynamicId = true,
+ .timelineSemaphore = true,
+ .uniformAndStorageBuffer8BitAccess = true,
+ .uniformBufferStandardLayout = true,
+
+ /* Vulkan 1.3 */
+ .bufferDeviceAddress = true,
+ .computeFullSubgroups = true,
+ .dynamicRendering = true,
+ .inlineUniformBlock = true,
+ .maintenance4 = true,
+ .pipelineCreationCacheControl = true,
+ .privateData = true,
+ .robustImageAccess = true,
+ .shaderDemoteToHelperInvocation = true,
+ .shaderIntegerDotProduct = true,
+ .shaderTerminateInvocation = true,
+ .shaderZeroInitializeWorkgroupMemory = true,
+ .subgroupSizeControl = true,
+ .synchronization2 = true,
+ .vulkanMemoryModel = true,
+ .vulkanMemoryModelDeviceScope = true,
+
+ /* Optional features */
+ .samplerAnisotropy = true,
+ .samplerYcbcrConversion = true,
+ .textureCompressionETC2 = true,
+ .textureCompressionASTC_LDR = true,
+
+ /* VK_EXT_mutable_descriptor_type */
+ .mutableDescriptorType = true,
+
+ /* VK_KHR_shader_expect_assume */
+ .shaderExpectAssume = true,
+
+ /* VK_KHR_shader_maximal_reconvergence */
+ .shaderMaximalReconvergence = true,
+
+ /* VK_KHR_shader_relaxed_extended_instruction */
+ .shaderRelaxedExtendedInstruction = true,
+
+ /* VK_EXT_shader_replicated_composites */
+ .shaderReplicatedComposites = true,
+
+ /* VK_KHR_shader_subgroup_uniform_control_flow */
+ .shaderSubgroupUniformControlFlow = true,
+ };
+}
+
+static void
+kk_get_device_properties(const struct kk_physical_device *pdev,
+ const struct kk_instance *instance,
+ struct vk_properties *properties)
+{
+ const VkSampleCountFlagBits sample_counts =
+ VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT |
+ // TODO_KOSMICKRISP Modify sample count based on what pdev supports
+ VK_SAMPLE_COUNT_4_BIT /* |
+ VK_SAMPLE_COUNT_8_BIT */
+ ;
+
+ assert(sample_counts <= (KK_MAX_SAMPLES << 1) - 1);
+
+ uint64_t os_page_size = 4096;
+ os_get_page_size(&os_page_size);
+
+ *properties = (struct vk_properties){
+ .apiVersion = kk_get_vk_version(),
+ .driverVersion = vk_get_driver_version(),
+ .vendorID = instance->force_vk_vendor != 0 ? instance->force_vk_vendor
+ : 0x106b,
+ .deviceID = 100,
+ .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
+
+ /* Vulkan 1.0 limits */
+ /* Values taken from Apple7
+ https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */
+ .maxImageDimension1D = kk_image_max_dimension(VK_IMAGE_TYPE_2D),
+ .maxImageDimension2D = kk_image_max_dimension(VK_IMAGE_TYPE_2D),
+ .maxImageDimension3D = kk_image_max_dimension(VK_IMAGE_TYPE_3D),
+ .maxImageDimensionCube = 16384,
+ .maxImageArrayLayers = 2048,
+ .maxTexelBufferElements = 256 * 1024 * 1024,
+ .maxUniformBufferRange = 65536,
+ .maxStorageBufferRange = UINT32_MAX,
+ .maxPushConstantsSize = KK_MAX_PUSH_SIZE,
+ .maxMemoryAllocationCount = 4096,
+ .maxSamplerAllocationCount = 4000,
+ .bufferImageGranularity = 16,
+ .sparseAddressSpaceSize = KK_SPARSE_ADDR_SPACE_SIZE,
+ .maxBoundDescriptorSets = KK_MAX_SETS,
+ .maxPerStageDescriptorSamplers = KK_MAX_DESCRIPTORS,
+ .maxPerStageDescriptorUniformBuffers = KK_MAX_DESCRIPTORS,
+ .maxPerStageDescriptorStorageBuffers = KK_MAX_DESCRIPTORS,
+ .maxPerStageDescriptorSampledImages = KK_MAX_DESCRIPTORS,
+ .maxPerStageDescriptorStorageImages = KK_MAX_DESCRIPTORS,
+ .maxPerStageDescriptorInputAttachments = KK_MAX_DESCRIPTORS,
+ .maxPerStageResources = UINT32_MAX,
+ .maxDescriptorSetSamplers = KK_MAX_DESCRIPTORS,
+ .maxDescriptorSetUniformBuffers = KK_MAX_DESCRIPTORS,
+ .maxDescriptorSetUniformBuffersDynamic = KK_MAX_DYNAMIC_BUFFERS / 2,
+ .maxDescriptorSetStorageBuffers = KK_MAX_DESCRIPTORS,
+ .maxDescriptorSetStorageBuffersDynamic = KK_MAX_DYNAMIC_BUFFERS / 2,
+ .maxDescriptorSetSampledImages = KK_MAX_DESCRIPTORS,
+ .maxDescriptorSetStorageImages = KK_MAX_DESCRIPTORS,
+ .maxDescriptorSetInputAttachments = KK_MAX_DESCRIPTORS,
+ .maxVertexInputAttributes = KK_MAX_ATTRIBS,
+ .maxVertexInputBindings = KK_MAX_VBUFS,
+ .maxVertexInputAttributeOffset = 2047,
+ .maxVertexInputBindingStride = 2048,
+ .maxVertexOutputComponents = 128,
+ .maxTessellationGenerationLevel = 64,
+ .maxTessellationPatchSize = 32,
+ .maxTessellationControlPerVertexInputComponents = 128,
+ .maxTessellationControlPerVertexOutputComponents = 128,
+ .maxTessellationControlPerPatchOutputComponents = 120,
+ .maxTessellationControlTotalOutputComponents = 4216,
+ .maxTessellationEvaluationInputComponents = 128,
+ .maxTessellationEvaluationOutputComponents = 128,
+ .maxGeometryShaderInvocations = 32,
+ .maxGeometryInputComponents = 128,
+ .maxGeometryOutputComponents = 128,
+ .maxGeometryOutputVertices = 1024,
+ .maxGeometryTotalOutputComponents = 1024,
+ .maxFragmentInputComponents = 128,
+ .maxFragmentOutputAttachments = KK_MAX_RTS,
+ .maxFragmentDualSrcAttachments = 1,
+ .maxFragmentCombinedOutputResources = 16,
+ .maxComputeSharedMemorySize = KK_MAX_SHARED_SIZE,
+ .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
+ .maxComputeWorkGroupInvocations = pdev->info.max_workgroup_invocations,
+ .maxComputeWorkGroupSize = {pdev->info.max_workgroup_count[0],
+ pdev->info.max_workgroup_count[1],
+ pdev->info.max_workgroup_count[2]},
+ .subPixelPrecisionBits = 8,
+ .subTexelPrecisionBits = 8,
+ .mipmapPrecisionBits = 8,
+ .maxDrawIndexedIndexValue = UINT32_MAX,
+ .maxDrawIndirectCount = UINT32_MAX,
+ .maxSamplerLodBias = 15,
+ .maxSamplerAnisotropy = 16,
+ .maxViewports = KK_MAX_VIEWPORTS,
+ .maxViewportDimensions = {32768, 32768},
+ .viewportBoundsRange = {-65536, 65536},
+ .viewportSubPixelBits = 8,
+ .minMemoryMapAlignment = os_page_size,
+ .minTexelBufferOffsetAlignment = KK_MIN_TEXEL_BUFFER_ALIGNMENT,
+ .minUniformBufferOffsetAlignment = KK_MIN_UBO_ALIGNMENT,
+ .minStorageBufferOffsetAlignment = KK_MIN_SSBO_ALIGNMENT,
+ .minTexelOffset = -8,
+ .maxTexelOffset = 7,
+ .minTexelGatherOffset = -32,
+ .maxTexelGatherOffset = 31,
+ .minInterpolationOffset = -0.5,
+ .maxInterpolationOffset = 0.4375,
+ .subPixelInterpolationOffsetBits = 4,
+ .maxFramebufferHeight = 16384,
+ .maxFramebufferWidth = 16384,
+ .maxFramebufferLayers = 2048,
+ .framebufferColorSampleCounts = sample_counts,
+ .framebufferDepthSampleCounts = sample_counts,
+ .framebufferNoAttachmentsSampleCounts = sample_counts,
+ .framebufferStencilSampleCounts = sample_counts,
+ .maxColorAttachments = KK_MAX_RTS,
+ .sampledImageColorSampleCounts = sample_counts,
+ .sampledImageIntegerSampleCounts = sample_counts,
+ .sampledImageDepthSampleCounts = sample_counts,
+ .sampledImageStencilSampleCounts = sample_counts,
+ .storageImageSampleCounts = sample_counts,
+ .maxSampleMaskWords = 1,
+ .timestampComputeAndGraphics = false,
+ .timestampPeriod = 1,
+ .maxClipDistances = 8,
+ .maxCullDistances = 8,
+ .maxCombinedClipAndCullDistances = 8,
+ .discreteQueuePriorities = 2,
+ .pointSizeRange = {1.0f, 1.0f},
+ .lineWidthRange = {1.0f, 1.0f},
+ .pointSizeGranularity = 0.0f,
+ .lineWidthGranularity = 0.0f,
+ .strictLines = false,
+ .standardSampleLocations = true,
+ .optimalBufferCopyOffsetAlignment = 1,
+ .optimalBufferCopyRowPitchAlignment = 1,
+ .nonCoherentAtomSize = 64,
+
+ /* Vulkan 1.0 sparse properties */
+ .sparseResidencyNonResidentStrict = false,
+ .sparseResidencyAlignedMipSize = false,
+ .sparseResidencyStandard2DBlockShape = false,
+ .sparseResidencyStandard2DMultisampleBlockShape = false,
+ .sparseResidencyStandard3DBlockShape = false,
+
+ /* Vulkan 1.1 properties */
+ .subgroupSize = 32,
+ .subgroupSupportedStages =
+ VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
+ .subgroupSupportedOperations =
+ VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
+ VK_SUBGROUP_FEATURE_VOTE_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
+ VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
+ VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
+ VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR, // | TODO_KOSMICKRISP
+ // VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
+ // VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
+ // VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR,
+ .subgroupQuadOperationsInAllStages = true,
+ .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
+ .maxMultiviewViewCount = KK_MAX_MULTIVIEW_VIEW_COUNT,
+ .maxMultiviewInstanceIndex = UINT32_MAX,
+ .maxPerSetDescriptors = UINT32_MAX,
+ .maxMemoryAllocationSize = (1u << 31),
+
+ /* Vulkan 1.2 properties */
+ .supportedDepthResolveModes =
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT |
+ VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
+ .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
+ VK_RESOLVE_MODE_MIN_BIT |
+ VK_RESOLVE_MODE_MAX_BIT,
+ .independentResolveNone = true,
+ .independentResolve = true,
+ .driverID = VK_DRIVER_ID_MESA_HONEYKRISP, // TODO_KOSMICKRISP Have our own
+ .conformanceVersion = (VkConformanceVersion){1, 4, 3, 2},
+ .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE,
+ .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE,
+ .shaderSignedZeroInfNanPreserveFloat16 = false,
+ .shaderSignedZeroInfNanPreserveFloat32 = false,
+ .shaderSignedZeroInfNanPreserveFloat64 = false,
+ .shaderDenormPreserveFloat16 = false,
+ .shaderDenormPreserveFloat32 = false,
+ .shaderDenormPreserveFloat64 = false,
+ .shaderDenormFlushToZeroFloat16 = false,
+ .shaderDenormFlushToZeroFloat32 = false,
+ .shaderDenormFlushToZeroFloat64 = false,
+ .shaderRoundingModeRTEFloat16 = false,
+ .shaderRoundingModeRTEFloat32 = false,
+ .shaderRoundingModeRTEFloat64 = false,
+ .shaderRoundingModeRTZFloat16 = false,
+ .shaderRoundingModeRTZFloat32 = false,
+ .shaderRoundingModeRTZFloat64 = false,
+ .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
+ .shaderUniformBufferArrayNonUniformIndexingNative = true,
+ .shaderSampledImageArrayNonUniformIndexingNative = true,
+ .shaderStorageBufferArrayNonUniformIndexingNative = true,
+ .shaderStorageImageArrayNonUniformIndexingNative = true,
+ .shaderInputAttachmentArrayNonUniformIndexingNative = true,
+ .robustBufferAccessUpdateAfterBind = true,
+ .quadDivergentImplicitLod = false,
+ .maxPerStageDescriptorUpdateAfterBindSamplers = KK_MAX_DESCRIPTORS,
+ .maxPerStageDescriptorUpdateAfterBindUniformBuffers = KK_MAX_DESCRIPTORS,
+ .maxPerStageDescriptorUpdateAfterBindStorageBuffers = KK_MAX_DESCRIPTORS,
+ .maxPerStageDescriptorUpdateAfterBindSampledImages = KK_MAX_DESCRIPTORS,
+ .maxPerStageDescriptorUpdateAfterBindStorageImages = KK_MAX_DESCRIPTORS,
+ .maxPerStageDescriptorUpdateAfterBindInputAttachments =
+ KK_MAX_DESCRIPTORS,
+ .maxPerStageUpdateAfterBindResources = UINT32_MAX,
+ .maxDescriptorSetUpdateAfterBindSamplers = KK_MAX_DESCRIPTORS,
+ .maxDescriptorSetUpdateAfterBindUniformBuffers = KK_MAX_DESCRIPTORS,
+ .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic =
+ KK_MAX_DYNAMIC_BUFFERS / 2,
+ .maxDescriptorSetUpdateAfterBindStorageBuffers = KK_MAX_DESCRIPTORS,
+ .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic =
+ KK_MAX_DYNAMIC_BUFFERS / 2,
+ .maxDescriptorSetUpdateAfterBindSampledImages = KK_MAX_DESCRIPTORS,
+ .maxDescriptorSetUpdateAfterBindStorageImages = KK_MAX_DESCRIPTORS,
+ .maxDescriptorSetUpdateAfterBindInputAttachments = KK_MAX_DESCRIPTORS,
+ .filterMinmaxSingleComponentFormats = false,
+ .filterMinmaxImageComponentMapping = false,
+ .maxTimelineSemaphoreValueDifference = UINT64_MAX,
+ .framebufferIntegerColorSampleCounts = sample_counts,
+
+ /* Vulkan 1.3 properties */
+ .minSubgroupSize = 32,
+ .maxSubgroupSize = 32,
+ .maxComputeWorkgroupSubgroups = pdev->info.max_workgroup_invocations / 32,
+ .requiredSubgroupSizeStages = 0,
+ .maxInlineUniformBlockSize = 1 << 16,
+ .maxPerStageDescriptorInlineUniformBlocks = 32,
+ .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
+ .maxDescriptorSetInlineUniformBlocks = 6 * 32,
+ .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
+ .maxInlineUniformTotalSize = 1 << 16,
+ .integerDotProduct4x8BitPackedUnsignedAccelerated = false,
+ .integerDotProduct4x8BitPackedSignedAccelerated = false,
+ .integerDotProduct4x8BitPackedMixedSignednessAccelerated = false,
+ .storageTexelBufferOffsetAlignmentBytes = KK_MIN_TEXEL_BUFFER_ALIGNMENT,
+ .storageTexelBufferOffsetSingleTexelAlignment = false,
+ .uniformTexelBufferOffsetAlignmentBytes = KK_MIN_TEXEL_BUFFER_ALIGNMENT,
+ .uniformTexelBufferOffsetSingleTexelAlignment = false,
+ .maxBufferSize = KK_MAX_BUFFER_SIZE,
+
+ /* VK_KHR_push_descriptor */
+ .maxPushDescriptors = KK_MAX_PUSH_DESCRIPTORS,
+
+ /* VK_EXT_custom_border_color */
+ .maxCustomBorderColorSamplers = 4000,
+
+ /* VK_EXT_extended_dynamic_state3 */
+ .dynamicPrimitiveTopologyUnrestricted = false,
+
+ /* VK_EXT_graphics_pipeline_library */
+ .graphicsPipelineLibraryFastLinking = true,
+ .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
+
+ /* VK_KHR_line_rasterization */
+ .lineSubPixelPrecisionBits = 8,
+
+ /* VK_KHR_maintenance5 */
+ .earlyFragmentMultisampleCoverageAfterSampleCounting = false,
+ .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
+ .depthStencilSwizzleOneSupport = false,
+ .polygonModePointSize = false,
+ .nonStrictSinglePixelWideLinesUseParallelogram = false,
+ .nonStrictWideLinesUseParallelogram = false,
+
+ /* VK_KHR_maintenance6 */
+ .blockTexelViewCompatibleMultipleLayers = false,
+ .maxCombinedImageSamplerDescriptorCount = 3,
+ .fragmentShadingRateClampCombinerInputs = false, /* TODO */
+
+ /* VK_KHR_maintenance7 */
+ .robustFragmentShadingRateAttachmentAccess = false,
+ .separateDepthStencilAttachmentAccess = false,
+ .maxDescriptorSetTotalUniformBuffersDynamic = KK_MAX_DYNAMIC_BUFFERS / 2,
+ .maxDescriptorSetTotalStorageBuffersDynamic = KK_MAX_DYNAMIC_BUFFERS / 2,
+ .maxDescriptorSetTotalBuffersDynamic = KK_MAX_DYNAMIC_BUFFERS,
+ .maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic =
+ KK_MAX_DYNAMIC_BUFFERS / 2,
+ .maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic =
+ KK_MAX_DYNAMIC_BUFFERS / 2,
+ .maxDescriptorSetUpdateAfterBindTotalBuffersDynamic =
+ KK_MAX_DYNAMIC_BUFFERS,
+
+ /* VK_EXT_legacy_vertex_attributes */
+ .nativeUnalignedPerformance = true,
+
+ /* VK_EXT_map_memory_placed */
+ .minPlacedMemoryMapAlignment = os_page_size,
+
+ /* VK_EXT_multi_draw */
+ .maxMultiDrawCount = UINT32_MAX,
+
+ /* VK_EXT_nested_command_buffer */
+ .maxCommandBufferNestingLevel = UINT32_MAX,
+
+ /* VK_EXT_pipeline_robustness */
+ .defaultRobustnessStorageBuffers =
+ VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
+ .defaultRobustnessUniformBuffers =
+ VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
+ .defaultRobustnessVertexInputs =
+ VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
+ .defaultRobustnessImages =
+ VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
+
+ /* VK_EXT_physical_device_drm gets populated later */
+
+ /* VK_EXT_provoking_vertex */
+ .provokingVertexModePerPipeline = true,
+ .transformFeedbackPreservesTriangleFanProvokingVertex = true,
+
+ /* VK_EXT_robustness2 */
+ .robustStorageBufferAccessSizeAlignment = KK_SSBO_BOUNDS_CHECK_ALIGNMENT,
+ .robustUniformBufferAccessSizeAlignment = KK_MIN_UBO_ALIGNMENT,
+
+ /* VK_EXT_sample_locations */
+ .sampleLocationSampleCounts = sample_counts,
+ .maxSampleLocationGridSize = (VkExtent2D){1, 1},
+ .sampleLocationCoordinateRange[0] = 0.0f,
+ .sampleLocationCoordinateRange[1] = 0.9375f,
+ .sampleLocationSubPixelBits = 4,
+ .variableSampleLocations = false,
+
+ /* VK_EXT_shader_object */
+ .shaderBinaryVersion = 0,
+
+ /* VK_EXT_transform_feedback */
+ .maxTransformFeedbackStreams = 4,
+ .maxTransformFeedbackBuffers = 4,
+ .maxTransformFeedbackBufferSize = UINT32_MAX,
+ .maxTransformFeedbackStreamDataSize = 2048,
+ .maxTransformFeedbackBufferDataSize = 512,
+ .maxTransformFeedbackBufferDataStride = 2048,
+ .transformFeedbackQueries = true,
+ .transformFeedbackStreamsLinesTriangles = false,
+ .transformFeedbackRasterizationStreamSelect = true,
+ .transformFeedbackDraw = true,
+
+ /* VK_KHR_vertex_attribute_divisor */
+ .maxVertexAttribDivisor = UINT32_MAX,
+ .supportsNonZeroFirstInstance = true,
+
+ /* VK_KHR_fragment_shader_barycentric */
+ .triStripVertexOrderIndependentOfProvokingVertex = false,
+ };
+
+ char gpu_name[256u];
+ mtl_device_get_name(pdev->mtl_dev_handle, gpu_name);
+ snprintf(properties->deviceName, sizeof(properties->deviceName), "%s",
+ gpu_name);
+
+ /* Not sure if there are layout specific things, so for now just reporting
+ * all layouts from extensions.
+ */
+ static const VkImageLayout supported_layouts[] = {
+ VK_IMAGE_LAYOUT_GENERAL, /* this one is required by spec */
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ VK_IMAGE_LAYOUT_PREINITIALIZED,
+ VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
+ VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
+ VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
+ VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
+ VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
+ VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
+ };
+
+ properties->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
+ properties->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
+ properties->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
+ properties->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
+
+ STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
+ memcpy(properties->optimalTilingLayoutUUID, instance->driver_build_sha,
+ VK_UUID_SIZE);
+
+ properties->identicalMemoryTypeRequirements = false;
+
+ /* VK_EXT_shader_module_identifier */
+ STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
+ sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
+ memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
+ vk_shaderModuleIdentifierAlgorithmUUID,
+ sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
+
+ const struct {
+ uint64_t registry_id;
+ uint64_t pad;
+ } dev_uuid = {
+ .registry_id = mtl_device_get_registry_id(pdev->mtl_dev_handle),
+ };
+ STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE);
+ memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
+ STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
+ memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
+
+ snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "KosmicKrisp");
+ snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
+ "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
+}
+
+static void
+kk_physical_device_init_pipeline_cache(struct kk_physical_device *pdev)
+{
+ struct kk_instance *instance = kk_physical_device_instance(pdev);
+
+ struct mesa_sha1 sha_ctx;
+ _mesa_sha1_init(&sha_ctx);
+
+ _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
+ sizeof(instance->driver_build_sha));
+
+ unsigned char sha[SHA1_DIGEST_LENGTH];
+ _mesa_sha1_final(&sha_ctx, sha);
+
+ STATIC_ASSERT(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
+ memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
+ memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
+}
+
+static void
+kk_physical_device_free_disk_cache(struct kk_physical_device *pdev)
+{
+#ifdef ENABLE_SHADER_CACHE
+ if (pdev->vk.disk_cache) {
+ disk_cache_destroy(pdev->vk.disk_cache);
+ pdev->vk.disk_cache = NULL;
+ }
+#else
+ assert(pdev->vk.disk_cache == NULL);
+#endif
+}
+
+static uint64_t
+kk_get_sysmem_heap_size(void)
+{
+ uint64_t sysmem_size_B = 0;
+ if (!os_get_total_physical_memory(&sysmem_size_B))
+ return 0;
+
+ /* Use 3/4 of total size to avoid swapping */
+ return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
+}
+
+static uint64_t
+kk_get_sysmem_heap_available(struct kk_physical_device *pdev)
+{
+ uint64_t sysmem_size_B = 0;
+ if (!os_get_available_system_memory(&sysmem_size_B)) {
+ vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
+ return 0;
+ }
+
+ /* Use 3/4 of available to avoid swapping */
+ return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
+}
+
+static void
+get_metal_limits(struct kk_physical_device *pdev)
+{
+ struct mtl_size workgroup_size =
+ mtl_device_max_threads_per_threadgroup(pdev->mtl_dev_handle);
+ pdev->info.max_workgroup_count[0] = workgroup_size.x;
+ pdev->info.max_workgroup_count[1] = workgroup_size.y;
+ pdev->info.max_workgroup_count[2] = workgroup_size.z;
+ pdev->info.max_workgroup_invocations =
+ MAX3(workgroup_size.x, workgroup_size.y, workgroup_size.z);
+}
+
+VkResult
+kk_enumerate_physical_devices(struct vk_instance *_instance)
+{
+ struct kk_instance *instance = (struct kk_instance *)_instance;
+ VkResult result;
+
+ struct kk_physical_device *pdev =
+ vk_zalloc(&instance->vk.alloc, sizeof(*pdev), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+
+ if (pdev == NULL) {
+ return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ pdev->mtl_dev_handle = mtl_device_create();
+ if (!pdev->mtl_dev_handle) {
+ result = VK_SUCCESS;
+ goto fail_alloc;
+ }
+ get_metal_limits(pdev);
+
+ struct vk_physical_device_dispatch_table dispatch_table;
+ vk_physical_device_dispatch_table_from_entrypoints(
+ &dispatch_table, &kk_physical_device_entrypoints, true);
+ vk_physical_device_dispatch_table_from_entrypoints(
+ &dispatch_table, &wsi_physical_device_entrypoints, false);
+
+ struct vk_device_extension_table supported_extensions;
+ kk_get_device_extensions(instance, &supported_extensions);
+
+ struct vk_features supported_features;
+ kk_get_device_features(&supported_extensions, &supported_features);
+
+ struct vk_properties properties;
+ kk_get_device_properties(pdev, instance, &properties);
+
+ properties.drmHasRender = false;
+
+ result = vk_physical_device_init(&pdev->vk, &instance->vk,
+ &supported_extensions, &supported_features,
+ &properties, &dispatch_table);
+ if (result != VK_SUCCESS)
+ goto fail_mtl_dev;
+
+ uint64_t sysmem_size_B = kk_get_sysmem_heap_size();
+ if (sysmem_size_B == 0) {
+ result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
+ "Failed to query total system memory");
+ goto fail_disk_cache;
+ }
+
+ uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
+ pdev->mem_heaps[sysmem_heap_idx] = (struct kk_memory_heap){
+ .size = sysmem_size_B,
+ .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+ .available = kk_get_sysmem_heap_available,
+ };
+
+ pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType){
+ .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ .heapIndex = sysmem_heap_idx,
+ };
+
+ assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
+ assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
+
+ pdev->queue_families[pdev->queue_family_count++] = (struct kk_queue_family){
+ .queue_flags =
+ VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
+ .queue_count = 1,
+ };
+ assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
+
+ pdev->sync_binary_type = vk_sync_binary_get_type(&kk_sync_type);
+ unsigned st_idx = 0;
+ pdev->sync_types[st_idx++] = &kk_sync_type;
+ pdev->sync_types[st_idx++] = &pdev->sync_binary_type.sync;
+ pdev->sync_types[st_idx++] = NULL;
+ assert(st_idx <= ARRAY_SIZE(pdev->sync_types));
+ pdev->vk.supported_sync_types = pdev->sync_types;
+
+ result = kk_init_wsi(pdev);
+ if (result != VK_SUCCESS)
+ goto fail_disk_cache;
+
+ list_add(&pdev->vk.link, &instance->vk.physical_devices.list);
+
+ return VK_SUCCESS;
+
+fail_disk_cache:
+ vk_physical_device_finish(&pdev->vk);
+fail_mtl_dev:
+ mtl_release(pdev->mtl_dev_handle);
+fail_alloc:
+ vk_free(&instance->vk.alloc, pdev);
+ return result;
+}
+
+void
+kk_physical_device_destroy(struct vk_physical_device *vk_pdev)
+{
+ struct kk_physical_device *pdev =
+ container_of(vk_pdev, struct kk_physical_device, vk);
+
+ kk_finish_wsi(pdev);
+ kk_physical_device_free_disk_cache(pdev);
+ vk_physical_device_finish(&pdev->vk);
+ mtl_release(pdev->mtl_dev_handle);
+ vk_free(&pdev->vk.instance->alloc, pdev);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetPhysicalDeviceMemoryProperties2(
+ VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
+{
+ VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice);
+
+ pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
+ for (int i = 0; i < pdev->mem_heap_count; i++) {
+ pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap){
+ .size = pdev->mem_heaps[i].size,
+ .flags = pdev->mem_heaps[i].flags,
+ };
+ }
+
+ pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
+ for (int i = 0; i < pdev->mem_type_count; i++) {
+ pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
+ }
+
+ vk_foreach_struct(ext, pMemoryProperties->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
+ VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
+
+ for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
+ const struct kk_memory_heap *heap = &pdev->mem_heaps[i];
+ uint64_t used = p_atomic_read(&heap->used);
+
+ /* From the Vulkan 1.3.278 spec:
+ *
+ * "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
+ * values in which memory usages are returned, with one element
+ * for each memory heap. A heap’s usage is an estimate of how
+ * much memory the process is currently using in that heap."
+ *
+ * TODO: Include internal allocations?
+ */
+ p->heapUsage[i] = used;
+
+ uint64_t available = heap->size;
+ if (heap->available)
+ available = heap->available(pdev);
+
+ /* From the Vulkan 1.3.278 spec:
+ *
+ * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
+ * values in which memory budgets are returned, with one
+ * element for each memory heap. A heap’s budget is a rough
+ * estimate of how much memory the process can allocate from
+ * that heap before allocations may fail or cause performance
+ * degradation. The budget includes any currently allocated
+ * device memory."
+ *
+ * and
+ *
+ * "The heapBudget value must be less than or equal to
+ * VkMemoryHeap::size for each heap."
+ *
+ * available (queried above) is the total amount free memory
+ * system-wide and does not include our allocations so we need
+ * to add that in.
+ */
+ uint64_t budget = MIN2(available + used, heap->size);
+
+ /* Set the budget at 90% of available to avoid thrashing */
+ p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
+ }
+
+ /* From the Vulkan 1.3.278 spec:
+ *
+ * "The heapBudget and heapUsage values must be zero for array
+ * elements greater than or equal to
+ * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
+ * heapBudget value must be non-zero for array elements less than
+ * VkPhysicalDeviceMemoryProperties::memoryHeapCount."
+ */
+ for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
+ p->heapBudget[i] = 0u;
+ p->heapUsage[i] = 0u;
+ }
+ break;
+ }
+ default:
+ vk_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetPhysicalDeviceQueueFamilyProperties2(
+ VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount,
+ VkQueueFamilyProperties2 *pQueueFamilyProperties)
+{
+ VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice);
+ VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
+ pQueueFamilyPropertyCount);
+
+ for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
+ const struct kk_queue_family *queue_family = &pdev->queue_families[i];
+
+ vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
+ {
+ p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
+ p->queueFamilyProperties.queueCount = queue_family->queue_count;
+ p->queueFamilyProperties.timestampValidBits =
+ 0; /* TODO_KOSMICKRISP Timestamp queries */
+ p->queueFamilyProperties.minImageTransferGranularity =
+ (VkExtent3D){1, 1, 1};
+ }
+ }
+}
+
+static const VkTimeDomainKHR kk_time_domains[] = {
+ VK_TIME_DOMAIN_DEVICE_KHR,
+ VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
+#ifdef CLOCK_MONOTONIC_RAW
+ VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
+#endif
+};
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,
+ uint32_t *pTimeDomainCount,
+ VkTimeDomainKHR *pTimeDomains)
+{
+ VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
+
+ for (int d = 0; d < ARRAY_SIZE(kk_time_domains); d++) {
+ vk_outarray_append_typed(VkTimeDomainKHR, &out, i)
+ {
+ *i = kk_time_domains[d];
+ }
+ }
+
+ return vk_outarray_status(&out);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_GetPhysicalDeviceMultisamplePropertiesEXT(
+ VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples,
+ VkMultisamplePropertiesEXT *pMultisampleProperties)
+{
+ VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice);
+
+ if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
+ pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
+ } else {
+ pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
+ }
+}
diff --git a/src/kosmickrisp/vulkan/kk_physical_device.h b/src/kosmickrisp/vulkan/kk_physical_device.h
new file mode 100644
index 00000000000..9daf6dfbdcc
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_physical_device.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_PHYSICAL_DEVICE_H
+#define KK_PHYSICAL_DEVICE_H 1
+
+#include "kk_private.h"
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "vk_physical_device.h"
+#include "vk_sync.h"
+#include "vk_sync_binary.h"
+
+#include "wsi_common.h"
+
+#include <sys/types.h>
+
+struct kk_instance;
+struct kk_physical_device;
+
+struct kk_queue_family {
+ VkQueueFlags queue_flags;
+ uint32_t queue_count;
+};
+
+struct kk_memory_heap {
+ uint64_t size;
+ uint64_t used;
+ VkMemoryHeapFlags flags;
+ uint64_t (*available)(struct kk_physical_device *pdev);
+};
+
+struct kk_device_info {
+ uint32_t max_workgroup_count[3];
+ uint32_t max_workgroup_invocations;
+};
+
+struct kk_physical_device {
+ struct vk_physical_device vk;
+ mtl_device *mtl_dev_handle;
+ struct kk_device_info info;
+
+ struct wsi_device wsi_device;
+
+ uint8_t device_uuid[VK_UUID_SIZE];
+
+ // TODO: add mapable VRAM heap if possible
+ struct kk_memory_heap mem_heaps[3];
+ VkMemoryType mem_types[3];
+ uint8_t mem_heap_count;
+ uint8_t mem_type_count;
+
+ // Emulated binary sync type
+ struct vk_sync_binary_type sync_binary_type;
+ const struct vk_sync_type *sync_types[3];
+
+ struct kk_queue_family queue_families[3];
+ uint8_t queue_family_count;
+};
+
+static inline uint32_t
+kk_min_cbuf_alignment()
+{
+ /* Size of vec4 */
+ return 16;
+}
+
+VK_DEFINE_HANDLE_CASTS(kk_physical_device, vk.base, VkPhysicalDevice,
+ VK_OBJECT_TYPE_PHYSICAL_DEVICE)
+
+static inline struct kk_instance *
+kk_physical_device_instance(struct kk_physical_device *pdev)
+{
+ return (struct kk_instance *)pdev->vk.instance;
+}
+
+VkResult kk_enumerate_physical_devices(struct vk_instance *_instance);
+void kk_physical_device_destroy(struct vk_physical_device *vk_device);
+
+#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
+ defined(VK_USE_PLATFORM_XCB_KHR) || defined(VK_USE_PLATFORM_XLIB_KHR) || \
+ defined(VK_USE_PLATFORM_DISPLAY_KHR) || defined(VK_USE_PLATFORM_METAL_EXT)
+#define KK_USE_WSI_PLATFORM
+#endif
+
+#endif // KK_PHYSICAL_DEVICE_H
diff --git a/src/kosmickrisp/vulkan/kk_private.h b/src/kosmickrisp/vulkan/kk_private.h
new file mode 100644
index 00000000000..d50481f6f8a
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_private.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2024 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_PRIVATE_H
+#define KK_PRIVATE_H 1
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "vk_log.h"
+#include "vk_util.h"
+
+#include <assert.h>
+
+#define KK_MAX_SETS 32
+#define KK_MAX_PUSH_SIZE 128
+#define KK_MAX_DYNAMIC_BUFFERS 64
+#define KK_MAX_RTS 8
+#define KK_MAX_SAMPLES 8
+#define KK_MIN_SSBO_ALIGNMENT 16
+#define KK_MIN_TEXEL_BUFFER_ALIGNMENT 16
+#define KK_MIN_UBO_ALIGNMENT 64
+#define KK_MAX_VIEWPORTS 16
+#define KK_MAX_DESCRIPTOR_SIZE 64
+#define KK_MAX_PUSH_DESCRIPTORS 32
+#define KK_MAX_DESCRIPTOR_SET_SIZE (1u << 30)
+#define KK_MAX_DESCRIPTORS (1 << 20)
+#define KK_PUSH_DESCRIPTOR_SET_SIZE \
+ (KK_MAX_PUSH_DESCRIPTORS * KK_MAX_DESCRIPTOR_SIZE)
+#define KK_SSBO_BOUNDS_CHECK_ALIGNMENT 4
+#define KK_MAX_MULTIVIEW_VIEW_COUNT 32
+#define KK_TEXTURE_BUFFER_WIDTH (1u << 14)
+#define KK_MAX_OCCLUSION_QUERIES (32768)
+
+#define KK_SPARSE_ADDR_SPACE_SIZE (1ull << 39)
+#define KK_MAX_BUFFER_SIZE (1ull << 31)
+#define KK_MAX_SHARED_SIZE (32 * 1024)
+
+/* Max size of a bound cbuf */
+#define KK_MAX_CBUF_SIZE (1u << 16)
+
+/* Metal related macros */
+#define KK_MTL_RESOURCE_OPTIONS \
+ MTL_RESOURCE_STORAGE_MODE_SHARED | \
+ MTL_RESOURCE_CPU_CACHE_MODE_DEFAULT_CACHE | \
+ MTL_RESOURCE_TRACKING_MODE_UNTRACKED
+
+#define KK_MAX_CMD_BUFFERS 256
+
+struct kk_addr_range {
+ uint64_t addr;
+ uint64_t range;
+};
+
+typedef enum kk_env_option_t {
+ KK_ENABLE_GPU_CAPTURE = 0,
+ KK_MAX_ENV_OPTIONS,
+} kk_env_option_t;
+
+struct kk_env_option {
+ const char *name;
+ bool value;
+};
+
+static struct kk_env_option KK_ENV_OPTIONS[KK_MAX_ENV_OPTIONS] = {
+ [KK_ENABLE_GPU_CAPTURE] =
+ {
+ .name = "MESA_KOSMICKRISP_ENABLE_GPU_CAPTURE",
+ .value = false,
+ },
+};
+
+static inline bool
+kk_get_environment_boolean(kk_env_option_t option)
+{
+ assert(option >= 0 && option < KK_MAX_ENV_OPTIONS);
+ struct kk_env_option *opt = &KK_ENV_OPTIONS[option];
+ const char *env_str = getenv(opt->name);
+ if (env_str) {
+ if (strncmp(env_str, "0", 1) != 0) {
+ opt->value = true;
+ } else {
+ opt->value = false;
+ }
+ }
+ return opt->value;
+}
+
+#define kk_debug_ignored_stype(sType) \
+ mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
+
+#endif
diff --git a/src/kosmickrisp/vulkan/kk_query_pool.c b/src/kosmickrisp/vulkan/kk_query_pool.c
new file mode 100644
index 00000000000..c6653ba9d13
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_query_pool.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright 2024 Valve Corporation
+ * Copyright 2024 Alyssa Rosenzweig
+ * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_query_pool.h"
+
+#include "kk_bo.h"
+#include "kk_buffer.h"
+#include "kk_cmd_buffer.h"
+#include "kk_device.h"
+#include "kk_encoder.h"
+#include "kk_entrypoints.h"
+#include "kk_physical_device.h"
+#include "kk_query_table.h"
+#include "kkcl.h"
+
+struct kk_query_report {
+ uint64_t value;
+};
+
+static inline bool
+kk_has_available(const struct kk_query_pool *pool)
+{
+ return pool->vk.query_type != VK_QUERY_TYPE_TIMESTAMP;
+}
+
+uint16_t *
+kk_pool_oq_index_ptr(const struct kk_query_pool *pool)
+{
+ return (uint16_t *)((uint8_t *)pool->bo->cpu + pool->query_start);
+}
+
+static uint32_t
+kk_reports_per_query(struct kk_query_pool *pool)
+{
+ switch (pool->vk.query_type) {
+ case VK_QUERY_TYPE_OCCLUSION:
+ case VK_QUERY_TYPE_TIMESTAMP:
+ case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
+ return 1;
+ case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+ return util_bitcount(pool->vk.pipeline_statistics);
+ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+ // Primitives succeeded and primitives needed
+ return 2;
+ default:
+ UNREACHABLE("Unsupported query type");
+ }
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkQueryPool *pQueryPool)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ struct kk_query_pool *pool;
+ VkResult result = VK_SUCCESS;
+
+ pool =
+ vk_query_pool_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*pool));
+ if (!pool)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ bool occlusion = pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION;
+ unsigned occlusion_queries = occlusion ? pCreateInfo->queryCount : 0;
+
+ /* We place the availability first and then data */
+ pool->query_start = 0;
+ if (kk_has_available(pool)) {
+ pool->query_start = align(pool->vk.query_count * sizeof(uint64_t),
+ sizeof(struct kk_query_report));
+ }
+
+ uint32_t reports_per_query = kk_reports_per_query(pool);
+ pool->query_stride = reports_per_query * sizeof(struct kk_query_report);
+
+ if (pool->vk.query_count > 0) {
+ uint32_t bo_size = pool->query_start;
+
+ /* For occlusion queries, we stick the query index remapping here */
+ if (occlusion_queries)
+ bo_size += sizeof(uint16_t) * pool->vk.query_count;
+ else
+ bo_size += pool->query_stride * pool->vk.query_count;
+
+ result = kk_alloc_bo(dev, &dev->vk.base, bo_size, 8, &pool->bo);
+ if (result != VK_SUCCESS) {
+ kk_DestroyQueryPool(device, kk_query_pool_to_handle(pool), pAllocator);
+ return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+
+ /* TODO_KOSMICKRISP Timestamps */
+ }
+
+ uint16_t *oq_index = kk_pool_oq_index_ptr(pool);
+
+ for (unsigned i = 0; i < occlusion_queries; ++i) {
+ uint64_t zero = 0;
+ unsigned index;
+
+ VkResult result =
+ kk_query_table_add(dev, &dev->occlusion_queries, zero, &index);
+
+ if (result != VK_SUCCESS) {
+ kk_DestroyQueryPool(device, kk_query_pool_to_handle(pool), pAllocator);
+ return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+
+ /* We increment as we go so we can clean up properly if we run out */
+ assert(pool->oq_queries < occlusion_queries);
+ oq_index[pool->oq_queries++] = index;
+ }
+
+ *pQueryPool = kk_query_pool_to_handle(pool);
+
+ return result;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_DestroyQueryPool(VkDevice device, VkQueryPool queryPool,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
+
+ if (!pool)
+ return;
+
+ uint16_t *oq_index = kk_pool_oq_index_ptr(pool);
+
+ for (unsigned i = 0; i < pool->oq_queries; ++i) {
+ kk_query_table_remove(dev, &dev->occlusion_queries, oq_index[i]);
+ }
+
+ kk_destroy_bo(dev, pool->bo);
+
+ vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk);
+}
+
+static uint64_t *
+kk_query_available_map(struct kk_query_pool *pool, uint32_t query)
+{
+ assert(kk_has_available(pool));
+ assert(query < pool->vk.query_count);
+ return (uint64_t *)pool->bo->cpu + query;
+}
+
+static uint64_t
+kk_query_offset(struct kk_query_pool *pool, uint32_t query)
+{
+ assert(query < pool->vk.query_count);
+ return pool->query_start + query * pool->query_stride;
+}
+
+static uint64_t
+kk_query_report_addr(struct kk_device *dev, struct kk_query_pool *pool,
+ uint32_t query)
+{
+ if (pool->oq_queries) {
+ uint16_t *oq_index = kk_pool_oq_index_ptr(pool);
+ return dev->occlusion_queries.bo->gpu +
+ (oq_index[query] * sizeof(uint64_t));
+ } else {
+ return pool->bo->gpu + kk_query_offset(pool, query);
+ }
+}
+
+static uint64_t
+kk_query_available_addr(struct kk_query_pool *pool, uint32_t query)
+{
+ assert(kk_has_available(pool));
+ assert(query < pool->vk.query_count);
+ return pool->bo->gpu + query * sizeof(uint64_t);
+}
+
+static struct kk_query_report *
+kk_query_report_map(struct kk_device *dev, struct kk_query_pool *pool,
+ uint32_t query)
+{
+ if (pool->oq_queries) {
+ uint64_t *queries = (uint64_t *)(dev->occlusion_queries.bo->cpu);
+ uint16_t *oq_index = kk_pool_oq_index_ptr(pool);
+
+ return (struct kk_query_report *)&queries[oq_index[query]];
+ } else {
+ return (void *)((char *)pool->bo->cpu + kk_query_offset(pool, query));
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_ResetQueryPool(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery,
+ uint32_t queryCount)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
+ for (uint32_t i = 0; i < queryCount; i++) {
+ struct kk_query_report *reports =
+ kk_query_report_map(dev, pool, firstQuery + i);
+
+ uint64_t value = 0;
+ if (kk_has_available(pool)) {
+ uint64_t *available = kk_query_available_map(pool, firstQuery + i);
+ *available = 0u;
+ } else {
+ value = UINT64_MAX;
+ }
+
+ for (unsigned j = 0; j < kk_reports_per_query(pool); ++j) {
+ reports[j].value = value;
+ }
+ }
+}
+
+/**
+ * Goes through a series of consecutive query indices in the given pool,
+ * setting all element values to 0 and emitting them as available.
+ */
+static void
+emit_zero_queries(struct kk_cmd_buffer *cmd, struct kk_query_pool *pool,
+ uint32_t first_index, uint32_t num_queries,
+ bool set_available)
+{
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+ mtl_buffer *buffer = pool->bo->map;
+
+ for (uint32_t i = 0; i < num_queries; i++) {
+ uint64_t report = kk_query_report_addr(dev, pool, first_index + i);
+
+ uint64_t value = 0;
+ if (kk_has_available(pool)) {
+ uint64_t available = kk_query_available_addr(pool, first_index + i);
+ kk_cmd_write(cmd, buffer, available, set_available);
+ } else {
+ value = set_available ? 0u : UINT64_MAX;
+ }
+
+ /* XXX: is this supposed to happen on the begin? */
+ for (unsigned j = 0; j < kk_reports_per_query(pool); ++j) {
+ kk_cmd_write(cmd, buffer,
+ report + (j * sizeof(struct kk_query_report)), value);
+ }
+ }
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
+ uint32_t firstQuery, uint32_t queryCount)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
+ emit_zero_queries(cmd, pool, firstQuery, queryCount, false);
+ /* If we are not mid encoder, just upload the writes */
+ if (cmd->encoder->main.last_used == KK_ENC_NONE)
+ upload_queue_writes(cmd);
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
+ VkPipelineStageFlags2 stage, VkQueryPool queryPool,
+ uint32_t query)
+{
+ /* TODO_KOSMICKRISP */
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
+ uint32_t query, VkQueryControlFlags flags)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
+ cmd->state.gfx.occlusion.mode = flags & VK_QUERY_CONTROL_PRECISE_BIT
+ ? MTL_VISIBILITY_RESULT_MODE_COUNTING
+ : MTL_VISIBILITY_RESULT_MODE_BOOLEAN;
+ cmd->state.gfx.dirty |= KK_DIRTY_OCCLUSION;
+ uint16_t *oq_index = kk_pool_oq_index_ptr(pool);
+ cmd->state.gfx.occlusion.index = oq_index[query];
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
+ uint32_t query)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
+ cmd->state.gfx.occlusion.mode = MTL_VISIBILITY_RESULT_MODE_DISABLED;
+ cmd->state.gfx.dirty |= KK_DIRTY_OCCLUSION;
+
+ /* Make the query available */
+ uint64_t addr = kk_query_available_addr(pool, query);
+ kk_cmd_write(cmd, pool->bo->map, addr, true);
+}
+
+static bool
+kk_query_is_available(struct kk_device *dev, struct kk_query_pool *pool,
+ uint32_t query)
+{
+ if (kk_has_available(pool)) {
+ uint64_t *available = kk_query_available_map(pool, query);
+ return p_atomic_read(available) != 0;
+ } else {
+ const struct kk_query_report *report =
+ kk_query_report_map(dev, pool, query);
+
+ return report->value != UINT64_MAX;
+ }
+}
+
+#define KK_QUERY_TIMEOUT 2000000000ull
+
+static VkResult
+kk_query_wait_for_available(struct kk_device *dev, struct kk_query_pool *pool,
+ uint32_t query)
+{
+ uint64_t abs_timeout_ns = os_time_get_absolute_timeout(KK_QUERY_TIMEOUT);
+
+ while (os_time_get_nano() < abs_timeout_ns) {
+ if (kk_query_is_available(dev, pool, query))
+ return VK_SUCCESS;
+
+ VkResult status = vk_device_check_status(&dev->vk);
+ if (status != VK_SUCCESS)
+ return status;
+ }
+
+ return vk_device_set_lost(&dev->vk, "query timeout");
+}
+
+static void
+cpu_write_query_result(void *dst, uint32_t idx, VkQueryResultFlags flags,
+ uint64_t result)
+{
+ if (flags & VK_QUERY_RESULT_64_BIT) {
+ uint64_t *dst64 = dst;
+ dst64[idx] = result;
+ } else {
+ uint32_t *dst32 = dst;
+ dst32[idx] = result;
+ }
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_GetQueryPoolResults(VkDevice device, VkQueryPool queryPool,
+ uint32_t firstQuery, uint32_t queryCount,
+ size_t dataSize, void *pData, VkDeviceSize stride,
+ VkQueryResultFlags flags)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
+
+ if (vk_device_is_lost(&dev->vk))
+ return VK_ERROR_DEVICE_LOST;
+
+ VkResult status = VK_SUCCESS;
+ for (uint32_t i = 0; i < queryCount; i++) {
+ const uint32_t query = firstQuery + i;
+
+ bool available = kk_query_is_available(dev, pool, query);
+
+ if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) {
+ status = kk_query_wait_for_available(dev, pool, query);
+ if (status != VK_SUCCESS)
+ return status;
+
+ available = true;
+ }
+
+ bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
+
+ const struct kk_query_report *src = kk_query_report_map(dev, pool, query);
+ assert(i * stride < dataSize);
+ void *dst = (char *)pData + i * stride;
+
+ uint32_t reports = kk_reports_per_query(pool);
+ if (write_results) {
+ for (uint32_t j = 0; j < reports; j++) {
+ cpu_write_query_result(dst, j, flags, src[j].value);
+ }
+ }
+
+ if (!write_results)
+ status = VK_NOT_READY;
+
+ if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
+ cpu_write_query_result(dst, reports, flags, available);
+ }
+
+ return status;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
+ uint32_t firstQuery, uint32_t queryCount,
+ VkBuffer dstBuffer, VkDeviceSize dstOffset,
+ VkDeviceSize stride, VkQueryResultFlags flags)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer);
+ VK_FROM_HANDLE(kk_query_pool, pool, queryPool);
+ VK_FROM_HANDLE(kk_buffer, dst_buf, dstBuffer);
+ struct kk_device *dev = kk_cmd_buffer_device(cmd);
+
+ struct kk_copy_query_pool_results_info info = {
+ .availability = kk_has_available(pool) ? pool->bo->gpu : 0,
+ .results = pool->oq_queries ? dev->occlusion_queries.bo->gpu
+ : pool->bo->gpu + pool->query_start,
+ .indices = pool->oq_queries ? pool->bo->gpu + pool->query_start : 0,
+ .dst_addr = dst_buf->vk.device_address + dstOffset,
+ .dst_stride = stride,
+ .first_query = firstQuery,
+ .flags = flags,
+ .reports_per_query = kk_reports_per_query(pool),
+ .query_count = queryCount,
+ };
+
+ util_dynarray_append(&cmd->encoder->copy_query_pool_result_infos,
+ struct kk_copy_query_pool_results_info, info);
+ util_dynarray_append(&cmd->encoder->resident_buffers, mtl_buffer *,
+ dst_buf->mtl_handle);
+ util_dynarray_append(&cmd->encoder->resident_buffers, mtl_buffer *,
+ pool->bo->map);
+ util_dynarray_append(&cmd->encoder->resident_buffers, mtl_buffer *,
+ dev->occlusion_queries.bo->map);
+ /* If we are not mid encoder, just upload the writes */
+ if (cmd->encoder->main.last_used == KK_ENC_NONE)
+ upload_queue_writes(cmd);
+}
diff --git a/src/kosmickrisp/vulkan/kk_query_pool.h b/src/kosmickrisp/vulkan/kk_query_pool.h
new file mode 100644
index 00000000000..509a564dcfa
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_query_pool.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_QUERY_POOL_H
+#define KK_QUERY_POOL_H 1
+
+#include "kk_private.h"
+
+#include "vulkan/runtime/vk_query_pool.h"
+
+struct kk_query_pool {
+ struct vk_query_pool vk;
+
+ struct kk_bo *bo;
+
+ uint32_t query_start;
+ uint32_t query_stride;
+
+ unsigned oq_queries;
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_query_pool, vk.base, VkQueryPool,
+ VK_OBJECT_TYPE_QUERY_POOL)
+
+uint16_t *kk_pool_oq_index_ptr(const struct kk_query_pool *pool);
+
+#endif /* KK_QUERY_POOL_H */
diff --git a/src/kosmickrisp/vulkan/kk_query_table.c b/src/kosmickrisp/vulkan/kk_query_table.c
new file mode 100644
index 00000000000..1ab205de0c2
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_query_table.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_query_table.h"
+
+#include "kk_device.h"
+#include "kk_physical_device.h"
+
+static uint32_t query_size = sizeof(uint64_t);
+
+static VkResult
+kk_query_table_grow_locked(struct kk_device *dev, struct kk_query_table *table,
+ uint32_t new_alloc)
+{
+ struct kk_bo *bo;
+ BITSET_WORD *new_in_use;
+ uint32_t *new_free_table;
+ VkResult result;
+
+ assert(new_alloc <= table->max_alloc);
+
+ const uint32_t new_mem_size = new_alloc * query_size;
+ result = kk_alloc_bo(dev, &dev->vk.base, new_mem_size, 256, &bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* We don't allow resize */
+ assert(table->bo == NULL);
+ table->bo = bo;
+
+ assert((new_alloc % BITSET_WORDBITS) == 0);
+ const size_t new_in_use_size = BITSET_WORDS(new_alloc) * sizeof(BITSET_WORD);
+ new_in_use =
+ vk_realloc(&dev->vk.alloc, table->in_use, new_in_use_size,
+ sizeof(BITSET_WORD), VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (new_in_use == NULL) {
+ return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
+ "Failed to allocate query in-use set");
+ }
+ memset((char *)new_in_use, 0, new_in_use_size);
+ table->in_use = new_in_use;
+
+ const size_t new_free_table_size = new_alloc * sizeof(uint32_t);
+ new_free_table =
+ vk_realloc(&dev->vk.alloc, table->free_table, new_free_table_size, 4,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (new_free_table == NULL) {
+ return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
+ "Failed to allocate query free table");
+ }
+ table->free_table = new_free_table;
+
+ return VK_SUCCESS;
+}
+
+VkResult
+kk_query_table_init(struct kk_device *dev, struct kk_query_table *table,
+ uint32_t query_count)
+{
+ memset(table, 0, sizeof(*table));
+ VkResult result;
+
+ simple_mtx_init(&table->mutex, mtx_plain);
+
+ assert(util_is_power_of_two_nonzero(query_count));
+
+ table->max_alloc = query_count;
+ table->next_query = 0;
+ table->free_count = 0;
+
+ result = kk_query_table_grow_locked(dev, table, query_count);
+ if (result != VK_SUCCESS) {
+ kk_query_table_finish(dev, table);
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
+void
+kk_query_table_finish(struct kk_device *dev, struct kk_query_table *table)
+{
+ if (table->bo != NULL)
+ kk_destroy_bo(dev, table->bo);
+ vk_free(&dev->vk.alloc, table->in_use);
+ vk_free(&dev->vk.alloc, table->free_table);
+ simple_mtx_destroy(&table->mutex);
+}
+
+static VkResult
+kk_query_table_alloc_locked(struct kk_device *dev, struct kk_query_table *table,
+ uint32_t *index_out)
+{
+ while (1) {
+ uint32_t index;
+ if (table->free_count > 0) {
+ index = table->free_table[--table->free_count];
+ } else if (table->next_query < table->max_alloc) {
+ index = table->next_query++;
+ } else {
+ return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
+ "Query table not large enough");
+ }
+
+ if (!BITSET_TEST(table->in_use, index)) {
+ BITSET_SET(table->in_use, index);
+ *index_out = index;
+ return VK_SUCCESS;
+ }
+ }
+}
+
+static VkResult
+kk_query_table_take_locked(struct kk_device *dev, struct kk_query_table *table,
+ uint32_t index)
+{
+ if (index >= table->max_alloc) {
+ return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
+ "Query %u does not exist", index);
+ }
+
+ if (BITSET_TEST(table->in_use, index)) {
+ return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
+ "Query %u is already in use", index);
+ } else {
+ BITSET_SET(table->in_use, index);
+ return VK_SUCCESS;
+ }
+}
+
+static VkResult
+kk_query_table_add_locked(struct kk_device *dev, struct kk_query_table *table,
+ uint64_t value, uint32_t *index_out)
+{
+ VkResult result = kk_query_table_alloc_locked(dev, table, index_out);
+ if (result != VK_SUCCESS)
+ return result;
+
+ uint64_t *map = (uint64_t *)table->bo->cpu + *index_out;
+ *map = value;
+
+ return VK_SUCCESS;
+}
+
+VkResult
+kk_query_table_add(struct kk_device *dev, struct kk_query_table *table,
+ uint64_t value, uint32_t *index_out)
+{
+ simple_mtx_lock(&table->mutex);
+ VkResult result = kk_query_table_add_locked(dev, table, value, index_out);
+ simple_mtx_unlock(&table->mutex);
+
+ return result;
+}
+
+static VkResult
+kk_query_table_insert_locked(struct kk_device *dev,
+ struct kk_query_table *table, uint32_t index,
+ uint64_t value)
+{
+ VkResult result = kk_query_table_take_locked(dev, table, index);
+ if (result != VK_SUCCESS)
+ return result;
+
+ uint64_t *map = (uint64_t *)table->bo->cpu + index;
+ *map = value;
+
+ return result;
+}
+
+VkResult
+kk_query_table_insert(struct kk_device *dev, struct kk_query_table *table,
+ uint32_t index, uint64_t value)
+{
+ simple_mtx_lock(&table->mutex);
+ VkResult result = kk_query_table_insert_locked(dev, table, index, value);
+ simple_mtx_unlock(&table->mutex);
+
+ return result;
+}
+
+static int
+compar_u32(const void *_a, const void *_b)
+{
+ const uint32_t *a = _a, *b = _b;
+ return *a - *b;
+}
+
+static void
+kk_query_table_compact_free_table(struct kk_query_table *table)
+{
+ if (table->free_count <= 1)
+ return;
+
+ qsort(table->free_table, table->free_count, sizeof(*table->free_table),
+ compar_u32);
+
+ uint32_t j = 1;
+ for (uint32_t i = 1; i < table->free_count; i++) {
+ if (table->free_table[i] == table->free_table[j - 1])
+ continue;
+
+ assert(table->free_table[i] > table->free_table[j - 1]);
+ table->free_table[j++] = table->free_table[i];
+ }
+
+ table->free_count = j;
+}
+
+void
+kk_query_table_remove(struct kk_device *dev, struct kk_query_table *table,
+ uint32_t index)
+{
+ simple_mtx_lock(&table->mutex);
+
+ uint64_t *map = (uint64_t *)table->bo->cpu + index;
+ *map = 0u;
+
+ assert(BITSET_TEST(table->in_use, index));
+
+ /* There may be duplicate entries in the free table. For most operations,
+ * this is fine as we always consult kk_query_table::in_use when
+ * allocating. However, it does mean that there's nothing preventing our
+ * free table from growing larger than the memory we allocated for it. In
+ * the unlikely event that we end up with more entries than we can fit in
+ * the allocated space, compact the table to ensure that the new entry
+ * we're about to add fits.
+ */
+ if (table->free_count >= table->max_alloc)
+ kk_query_table_compact_free_table(table);
+ assert(table->free_count < table->max_alloc);
+
+ BITSET_CLEAR(table->in_use, index);
+ table->free_table[table->free_count++] = index;
+
+ simple_mtx_unlock(&table->mutex);
+}
diff --git a/src/kosmickrisp/vulkan/kk_query_table.h b/src/kosmickrisp/vulkan/kk_query_table.h
new file mode 100644
index 00000000000..a7d0be2cb5a
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_query_table.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_QUERY_TABLE_H
+#define KK_QUERY_TABLE_H 1
+
+#include "kk_private.h"
+
+#include "kk_bo.h"
+
+#include "util/bitset.h"
+#include "util/simple_mtx.h"
+
+struct kk_device;
+
+struct kk_query_table {
+ simple_mtx_t mutex;
+
+ uint32_t max_alloc; /**< Maximum possible number of queries */
+ uint32_t next_query; /**< Next unallocated query */
+ uint32_t free_count; /**< Size of free_table */
+
+ struct kk_bo *bo; /**< Memoery where queries are stored */
+
+ /* Bitset of all queries currently in use. This is the single source
+ * of truth for what is and isn't free. The free_table and next_query are
+ * simply hints to make finding a free descrptor fast. Every free
+ * query will either be above next_query or in free_table but not
+ * everything which satisfies those two criteria is actually free.
+ */
+ BITSET_WORD *in_use;
+
+ /* Stack for free query elements */
+ uint32_t *free_table;
+};
+
+VkResult kk_query_table_init(struct kk_device *dev,
+ struct kk_query_table *table,
+ uint32_t query_count);
+
+void kk_query_table_finish(struct kk_device *dev, struct kk_query_table *table);
+
+VkResult kk_query_table_add(struct kk_device *dev, struct kk_query_table *table,
+ uint64_t value, uint32_t *index_out);
+
+VkResult kk_query_table_insert(struct kk_device *dev,
+ struct kk_query_table *table, uint32_t index,
+ uint64_t value);
+
+void kk_query_table_remove(struct kk_device *dev, struct kk_query_table *table,
+ uint32_t index);
+
+#endif /* KK_QUERY_TABLE_H */
diff --git a/src/kosmickrisp/vulkan/kk_queue.c b/src/kosmickrisp/vulkan/kk_queue.c
new file mode 100644
index 00000000000..3d112c21cae
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_queue.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_queue.h"
+#include "kk_buffer.h"
+#include "kk_cmd_buffer.h"
+#include "kk_device.h"
+#include "kk_encoder.h"
+#include "kk_physical_device.h"
+#include "kk_sync.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+#include "vk_cmd_queue.h"
+
+static VkResult
+kk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
+{
+ struct kk_queue *queue = container_of(vk_queue, struct kk_queue, vk);
+ struct kk_device *dev = kk_queue_device(queue);
+
+ if (vk_queue_is_lost(&queue->vk))
+ return VK_ERROR_DEVICE_LOST;
+
+ struct kk_encoder *encoder;
+ VkResult result = kk_encoder_init(dev->mtl_handle, queue, &encoder);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Chain with previous sumbission */
+ if (queue->wait_fence) {
+ util_dynarray_append(&encoder->main.fences, mtl_fence *,
+ queue->wait_fence);
+ encoder->main.wait_fence = true;
+ }
+
+ for (struct vk_sync_wait *wait = submit->waits,
+ *end = submit->waits + submit->wait_count;
+ wait != end; ++wait) {
+ struct kk_sync_timeline *sync =
+ container_of(wait->sync, struct kk_sync_timeline, base);
+ mtl_encode_wait_for_event(encoder->main.cmd_buffer, sync->mtl_handle,
+ wait->wait_value);
+ }
+
+ for (uint32_t i = 0; i < submit->command_buffer_count; ++i) {
+ struct kk_cmd_buffer *cmd_buffer =
+ container_of(submit->command_buffers[i], struct kk_cmd_buffer, vk);
+ cmd_buffer->encoder = encoder;
+ /* TODO_KOSMICKRISP We need to release command buffer resources here for
+ * the following case: User records command buffers once and then reuses
+ * them multiple times. The resource release should be done at
+ * vkBeginCommandBuffer, but because we are recording all commands to
+ * later execute them at queue submission, the recording does not record
+ * the begin/end commands and jumps straight to the actual commands. */
+ kk_cmd_release_resources(dev, cmd_buffer);
+
+ vk_cmd_queue_execute(&cmd_buffer->vk.cmd_queue,
+ kk_cmd_buffer_to_handle(cmd_buffer),
+ &dev->vk.dispatch_table);
+ kk_encoder_end(cmd_buffer);
+ cmd_buffer->encoder = NULL;
+ }
+
+ for (uint32_t i = 0u; i < submit->signal_count; ++i) {
+ struct vk_sync_signal *signal = &submit->signals[i];
+ struct kk_sync_timeline *sync =
+ container_of(signal->sync, struct kk_sync_timeline, base);
+ mtl_encode_signal_event(encoder->main.cmd_buffer, sync->mtl_handle,
+ signal->signal_value);
+ }
+
+ /* Steal the last fence to chain with the next submission */
+ if (util_dynarray_num_elements(&encoder->main.fences, mtl_fence *) > 0)
+ queue->wait_fence = util_dynarray_pop(&encoder->main.fences, mtl_fence *);
+ kk_encoder_submit(encoder);
+
+ return VK_SUCCESS;
+}
+
+VkResult
+kk_queue_init(struct kk_device *dev, struct kk_queue *queue,
+ const VkDeviceQueueCreateInfo *pCreateInfo,
+ uint32_t index_in_family)
+{
+ VkResult result;
+
+ result = vk_queue_init(&queue->vk, &dev->vk, pCreateInfo, index_in_family);
+ if (result != VK_SUCCESS)
+ return result;
+
+ queue->main.mtl_handle =
+ mtl_new_command_queue(dev->mtl_handle, KK_MAX_CMD_BUFFERS);
+ queue->pre_gfx.mtl_handle =
+ mtl_new_command_queue(dev->mtl_handle, KK_MAX_CMD_BUFFERS);
+
+ queue->vk.driver_submit = kk_queue_submit;
+
+ return VK_SUCCESS;
+}
+
+void
+kk_queue_finish(struct kk_device *dev, struct kk_queue *queue)
+{
+ if (queue->wait_fence)
+ mtl_release(queue->wait_fence);
+ mtl_release(queue->pre_gfx.mtl_handle);
+ mtl_release(queue->main.mtl_handle);
+ vk_queue_finish(&queue->vk);
+}
diff --git a/src/kosmickrisp/vulkan/kk_queue.h b/src/kosmickrisp/vulkan/kk_queue.h
new file mode 100644
index 00000000000..0633559d867
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_queue.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_QUEUE_H
+#define KK_QUEUE_H 1
+
+#include "kk_private.h"
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "vk_queue.h"
+
+struct kk_queue {
+ struct vk_queue vk;
+ /* We require one queue per command buffer no to lock. Main will handle all
+ * work, but if we are in a render pass and we require to massage inputs,
+ * then pre_gfx will be used to submit compute work that handles that so we
+ * don't have to break the render encoder. */
+ struct {
+ struct mtl_command_queue *mtl_handle;
+ } main, pre_gfx;
+
+ mtl_fence *wait_fence;
+};
+
+static inline struct kk_device *
+kk_queue_device(struct kk_queue *queue)
+{
+ return (struct kk_device *)queue->vk.base.device;
+}
+
+VkResult kk_queue_init(struct kk_device *dev, struct kk_queue *queue,
+ const VkDeviceQueueCreateInfo *pCreateInfo,
+ uint32_t index_in_family);
+
+void kk_queue_finish(struct kk_device *dev, struct kk_queue *queue);
+
+#endif
diff --git a/src/kosmickrisp/vulkan/kk_sampler.c b/src/kosmickrisp/vulkan/kk_sampler.c
new file mode 100644
index 00000000000..a555cc185c6
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_sampler.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_sampler.h"
+
+#include "kk_entrypoints.h"
+#include "kk_physical_device.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+#include "kosmickrisp/bridge/vk_to_mtl_map.h"
+
+#include "vk_format.h"
+#include "vk_sampler.h"
+
+#include "util/bitpack_helpers.h"
+#include "util/format/format_utils.h"
+#include "util/format_srgb.h"
+
+static bool
+uses_border(const VkSamplerCreateInfo *info)
+{
+ return info->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
+ info->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
+ info->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
+}
+
+static bool
+is_border_color_custom(VkBorderColor color, bool workaround_rgba4)
+{
+ switch (color) {
+ case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
+ /* We may need to workaround RGBA4 UNORM issues with opaque black. This
+ * only affects float opaque black, there are no pure integer RGBA4
+ * formats to worry about.
+ */
+ return workaround_rgba4;
+
+ case VK_BORDER_COLOR_INT_CUSTOM_EXT:
+ case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static struct mtl_sampler_packed
+pack_sampler_info(const struct VkSamplerCreateInfo *sampler_info)
+{
+ enum mtl_compare_function compare =
+ sampler_info->compareEnable
+ ? vk_compare_op_to_mtl_compare_function(sampler_info->compareOp)
+ : MTL_COMPARE_FUNCTION_ALWAYS;
+ enum mtl_sampler_mip_filter mip_filter =
+ sampler_info->unnormalizedCoordinates
+ ? MTL_SAMPLER_MIP_FILTER_NOT_MIP_MAPPED
+ : vk_sampler_mipmap_mode_to_mtl_sampler_mip_filter(
+ sampler_info->mipmapMode);
+ enum mtl_sampler_border_color border_color =
+ uses_border(sampler_info) ? vk_border_color_to_mtl_sampler_border_color(
+ sampler_info->borderColor)
+ : MTL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE;
+ uint32_t max_anisotropy =
+ sampler_info->anisotropyEnable
+ ? util_next_power_of_two(MAX2(sampler_info->maxAnisotropy, 1))
+ : 1u;
+ return (struct mtl_sampler_packed){
+ .mode_u = vk_sampler_address_mode_to_mtl_sampler_address_mode(
+ sampler_info->addressModeU),
+ .mode_v = vk_sampler_address_mode_to_mtl_sampler_address_mode(
+ sampler_info->addressModeV),
+ .mode_w = vk_sampler_address_mode_to_mtl_sampler_address_mode(
+ sampler_info->addressModeW),
+ .border_color = border_color,
+ .min_filter =
+ vk_filter_to_mtl_sampler_min_mag_filter(sampler_info->minFilter),
+ .mag_filter =
+ vk_filter_to_mtl_sampler_min_mag_filter(sampler_info->magFilter),
+ .mip_filter = mip_filter,
+ .compare_func = compare,
+ .min_lod = sampler_info->minLod,
+ .max_lod = sampler_info->maxLod,
+ .max_anisotropy = max_anisotropy,
+ .normalized_coordinates = !sampler_info->unnormalizedCoordinates,
+ };
+}
+
+static mtl_sampler_descriptor *
+create_sampler_descriptor(const struct mtl_sampler_packed *packed)
+{
+ mtl_sampler_descriptor *descriptor = mtl_new_sampler_descriptor();
+ mtl_sampler_descriptor_set_normalized_coordinates(
+ descriptor, packed->normalized_coordinates);
+ mtl_sampler_descriptor_set_address_mode(descriptor, packed->mode_u,
+ packed->mode_v, packed->mode_w);
+ mtl_sampler_descriptor_set_border_color(descriptor, packed->border_color);
+ mtl_sampler_descriptor_set_filters(descriptor, packed->min_filter,
+ packed->mag_filter, packed->mip_filter);
+ mtl_sampler_descriptor_set_lod_clamp(descriptor, packed->min_lod,
+ packed->max_lod);
+ mtl_sampler_descriptor_set_max_anisotropy(descriptor,
+ packed->max_anisotropy);
+ mtl_sampler_descriptor_set_compare_function(descriptor,
+ packed->compare_func);
+ return descriptor;
+}
+
+mtl_sampler *
+kk_sampler_create(struct kk_device *dev,
+ const struct mtl_sampler_packed *packed)
+{
+ mtl_sampler_descriptor *desc = create_sampler_descriptor(packed);
+ return mtl_new_sampler(dev->mtl_handle, desc);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+kk_CreateSampler(VkDevice device, const VkSamplerCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkSampler *pSampler)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VkResult result = VK_SUCCESS;
+ struct kk_sampler *sampler;
+
+ sampler =
+ vk_sampler_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*sampler));
+ if (!sampler)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ struct mtl_sampler_packed packed = pack_sampler_info(pCreateInfo);
+ result = kk_sampler_heap_add(dev, packed, &sampler->planes[0].hw);
+ if (result != VK_SUCCESS) {
+ kk_DestroySampler(device, kk_sampler_to_handle(sampler), pAllocator);
+ return result;
+ }
+ sampler->plane_count = 1;
+
+ /* In order to support CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT, we
+ * need multiple sampler planes: at minimum we will need one for luminance
+ * (the default), and one for chroma. Each sampler plane needs its own
+ * sampler table entry. However, sampler table entries are very rare on
+ * NVIDIA; we only have 4096 entries for the whole VkDevice, and each plane
+ * would burn one of those. So we make sure to allocate only the minimum
+ * amount that we actually need (i.e., either 1 or 2), and then just copy
+ * the last sampler plane out as far as we need to fill the number of image
+ * planes.
+ */
+
+ if (sampler->vk.ycbcr_conversion) {
+ const VkFilter chroma_filter =
+ sampler->vk.ycbcr_conversion->state.chroma_filter;
+ if (pCreateInfo->magFilter != chroma_filter ||
+ pCreateInfo->minFilter != chroma_filter) {
+ packed.min_filter = packed.mag_filter =
+ vk_filter_to_mtl_sampler_min_mag_filter(chroma_filter);
+ result = kk_sampler_heap_add(dev, packed, &sampler->planes[1].hw);
+ if (result != VK_SUCCESS) {
+ kk_DestroySampler(device, kk_sampler_to_handle(sampler),
+ pAllocator);
+ return result;
+ }
+ sampler->plane_count = 2;
+ }
+ }
+
+ /* LOD data passed in the descriptor set */
+ sampler->lod_bias_fp16 = _mesa_float_to_half(pCreateInfo->mipLodBias);
+ sampler->lod_min_fp16 = _mesa_float_to_half(pCreateInfo->minLod);
+ sampler->lod_max_fp16 = _mesa_float_to_half(pCreateInfo->maxLod);
+
+ /* Border color passed in the descriptor */
+ sampler->has_border = uses_border(pCreateInfo) &&
+ is_border_color_custom(pCreateInfo->borderColor, true);
+ if (sampler->has_border) {
+ /* We also need to record the border.
+ *
+ * If there is a border colour component mapping, we need to swizzle with
+ * it. Otherwise, we can assume there's nothing to do.
+ */
+ VkClearColorValue bc = sampler->vk.border_color_value;
+
+ const VkSamplerBorderColorComponentMappingCreateInfoEXT *swiz_info =
+ vk_find_struct_const(
+ pCreateInfo->pNext,
+ SAMPLER_BORDER_COLOR_COMPONENT_MAPPING_CREATE_INFO_EXT);
+
+ if (swiz_info) {
+ const bool is_int = vk_border_color_is_int(pCreateInfo->borderColor);
+ bc = vk_swizzle_color_value(bc, swiz_info->components, is_int);
+ }
+
+ sampler->custom_border = bc;
+ }
+
+ *pSampler = kk_sampler_to_handle(sampler);
+
+ return VK_SUCCESS;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+kk_DestroySampler(VkDevice device, VkSampler _sampler,
+ const VkAllocationCallbacks *pAllocator)
+{
+ VK_FROM_HANDLE(kk_device, dev, device);
+ VK_FROM_HANDLE(kk_sampler, sampler, _sampler);
+
+ if (!sampler)
+ return;
+
+ for (uint8_t plane = 0; plane < sampler->plane_count; plane++)
+ kk_sampler_heap_remove(dev, sampler->planes[plane].hw);
+
+ vk_sampler_destroy(&dev->vk, pAllocator, &sampler->vk);
+}
diff --git a/src/kosmickrisp/vulkan/kk_sampler.h b/src/kosmickrisp/vulkan/kk_sampler.h
new file mode 100644
index 00000000000..15f3de9e582
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_sampler.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_SAMPLER_H
+#define KK_SAMPLER_H 1
+
+#include "kk_device.h"
+#include "kk_physical_device.h"
+#include "kk_private.h"
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "vk_sampler.h"
+#include "vk_ycbcr_conversion.h"
+
+#include "vk_format.h"
+
+struct kk_sampler {
+ struct vk_sampler vk;
+ VkClearColorValue custom_border;
+ bool has_border;
+
+ uint8_t plane_count;
+ uint16_t lod_bias_fp16;
+ uint16_t lod_min_fp16;
+ uint16_t lod_max_fp16;
+
+ struct {
+ struct kk_rc_sampler *hw;
+ } planes[2];
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_sampler, vk.base, VkSampler,
+ VK_OBJECT_TYPE_SAMPLER)
+
+#endif /* KK_SAMPLER_H */
diff --git a/src/kosmickrisp/vulkan/kk_shader.c b/src/kosmickrisp/vulkan/kk_shader.c
new file mode 100644
index 00000000000..2f723599655
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_shader.c
@@ -0,0 +1,1278 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_shader.h"
+
+#include "kk_cmd_buffer.h"
+#include "kk_descriptor_set_layout.h"
+#include "kk_debug.h"
+#include "kk_device.h"
+#include "kk_format.h"
+#include "kk_nir_lower_vbo.h"
+#include "kk_physical_device.h"
+#include "kk_sampler.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+#include "kosmickrisp/bridge/vk_to_mtl_map.h"
+#include "kosmickrisp/compiler/nir_to_msl.h"
+
+#include "nir_builder.h"
+#include "nir_lower_blend.h"
+
+#include "vk_blend.h"
+#include "vk_format.h"
+#include "vk_graphics_state.h"
+#include "vk_nir_convert_ycbcr.h"
+#include "vk_pipeline.h"
+
+static const nir_shader_compiler_options *
+kk_get_nir_options(struct vk_physical_device *vk_pdev, mesa_shader_stage stage,
+ UNUSED const struct vk_pipeline_robustness_state *rs)
+{
+ static nir_shader_compiler_options options = {
+ .lower_fdph = true,
+ .has_fsub = true,
+ .has_isub = true,
+ .lower_extract_word = true,
+ .lower_extract_byte = true,
+ .lower_insert_word = true,
+ .lower_insert_byte = true,
+ .lower_fmod = true,
+ .discard_is_demote = true,
+ .instance_id_includes_base_index = true,
+ .lower_device_index_to_zero = true,
+ .lower_pack_64_2x32_split = true,
+ .lower_unpack_64_2x32_split = true,
+ .lower_pack_64_2x32 = true,
+ .lower_pack_half_2x16 = true,
+ .lower_pack_split = true,
+ .lower_unpack_half_2x16 = true,
+ .has_cs_global_id = true,
+ .lower_vector_cmp = true,
+ .lower_fquantize2f16 = true,
+ .lower_scmp = true,
+ .lower_ifind_msb = true,
+ .lower_ufind_msb = true,
+ .lower_find_lsb = true,
+ .has_uclz = true,
+ .lower_mul_2x32_64 = true,
+ .lower_uadd_carry = true,
+ .lower_usub_borrow = true,
+ /* Metal does not support double. */
+ .lower_doubles_options = (nir_lower_doubles_options)(~0),
+ .lower_int64_options =
+ nir_lower_ufind_msb64 | nir_lower_subgroup_shuffle64,
+ };
+ return &options;
+}
+
+static struct spirv_to_nir_options
+kk_get_spirv_options(struct vk_physical_device *vk_pdev,
+ UNUSED mesa_shader_stage stage,
+ const struct vk_pipeline_robustness_state *rs)
+{
+ return (struct spirv_to_nir_options){
+ .environment = NIR_SPIRV_VULKAN,
+ .ssbo_addr_format = nir_address_format_64bit_bounded_global,
+ .phys_ssbo_addr_format = nir_address_format_64bit_global,
+ .ubo_addr_format = nir_address_format_64bit_bounded_global,
+ .shared_addr_format = nir_address_format_32bit_offset,
+ .min_ssbo_alignment = KK_MIN_SSBO_ALIGNMENT,
+ .min_ubo_alignment = KK_MIN_UBO_ALIGNMENT,
+ };
+}
+
+static void
+kk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev, nir_shader *nir,
+ UNUSED const struct vk_pipeline_robustness_state *rs)
+{
+ /* Gather info before preprocess_nir but after some general lowering, so
+ * inputs_read and system_values_read are accurately set.
+ */
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+ /* nir_lower_io_to_temporaries is required before nir_lower_blend since the
+ * blending pass sinks writes to the end of the block where we may have a
+ * jump, which is illegal.
+ */
+ NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries,
+ nir_shader_get_entrypoint(nir), true, false);
+
+ msl_preprocess_nir(nir);
+}
+
+struct kk_vs_key {
+ bool is_points;
+};
+
+static void
+kk_populate_vs_key(struct kk_vs_key *key,
+ const struct vk_graphics_pipeline_state *state)
+{
+ memset(key, 0, sizeof(*key));
+ key->is_points =
+ (state->ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST);
+}
+
+struct kk_fs_key {
+ VkFormat color_formats[MESA_VK_MAX_COLOR_ATTACHMENTS];
+ struct vk_color_blend_state color_blend;
+ uint32_t rasterization_samples;
+ uint16_t static_sample_mask;
+ bool has_depth;
+};
+
+static void
+kk_populate_fs_key(struct kk_fs_key *key,
+ const struct vk_graphics_pipeline_state *state)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* Required since we [de]serialize blend, and render target swizzle for
+ * non-native formats */
+ memcpy(key->color_formats, state->rp->color_attachment_formats,
+ sizeof(key->color_formats));
+
+ /* Blend state gets [de]serialized, so we need to hash it */
+ if (state->cb)
+ key->color_blend = *(state->cb);
+
+ if (state->ms) {
+ key->rasterization_samples = state->ms->rasterization_samples;
+ key->static_sample_mask = state->ms->sample_mask;
+ }
+
+ /* Depth writes are removed unless there's an actual attachment */
+ key->has_depth = state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED;
+}
+
+static void
+kk_hash_graphics_state(struct vk_physical_device *device,
+ const struct vk_graphics_pipeline_state *state,
+ const struct vk_features *enabled_features,
+ VkShaderStageFlags stages, blake3_hash blake3_out)
+{
+ struct mesa_blake3 blake3_ctx;
+ _mesa_blake3_init(&blake3_ctx);
+
+ if (stages & VK_SHADER_STAGE_VERTEX_BIT) {
+ struct kk_vs_key key;
+ kk_populate_vs_key(&key, state);
+ _mesa_blake3_update(&blake3_ctx, &key, sizeof(key));
+ }
+
+ if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
+ struct kk_fs_key key;
+ kk_populate_fs_key(&key, state);
+ _mesa_blake3_update(&blake3_ctx, &key, sizeof(key));
+
+ _mesa_blake3_update(&blake3_ctx, &state->rp->view_mask,
+ sizeof(state->rp->view_mask));
+ }
+
+ _mesa_blake3_final(&blake3_ctx, blake3_out);
+}
+
+static void
+shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+ assert(glsl_type_is_vector_or_scalar(type));
+
+ uint32_t comp_size =
+ glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
+ unsigned length = glsl_get_vector_elements(type);
+ *size = comp_size * length, *align = comp_size;
+}
+
+struct lower_ycbcr_state {
+ uint32_t set_layout_count;
+ struct vk_descriptor_set_layout *const *set_layouts;
+};
+
+static const struct vk_ycbcr_conversion_state *
+lookup_ycbcr_conversion(const void *_state, uint32_t set, uint32_t binding,
+ uint32_t array_index)
+{
+ const struct lower_ycbcr_state *state = _state;
+ assert(set < state->set_layout_count);
+ assert(state->set_layouts[set] != NULL);
+ const struct kk_descriptor_set_layout *set_layout =
+ vk_to_kk_descriptor_set_layout(state->set_layouts[set]);
+ assert(binding < set_layout->binding_count);
+
+ const struct kk_descriptor_set_binding_layout *bind_layout =
+ &set_layout->binding[binding];
+
+ if (bind_layout->immutable_samplers == NULL)
+ return NULL;
+
+ array_index = MIN2(array_index, bind_layout->array_size - 1);
+
+ const struct kk_sampler *sampler =
+ bind_layout->immutable_samplers[array_index];
+
+ return sampler && sampler->vk.ycbcr_conversion
+ ? &sampler->vk.ycbcr_conversion->state
+ : NULL;
+}
+
+static int
+type_size_vec4(const struct glsl_type *type, bool bindless)
+{
+ return glsl_count_attribute_slots(type, false);
+}
+
+static bool
+kk_nir_swizzle_fragment_output(nir_builder *b, nir_intrinsic_instr *intrin,
+ void *data)
+{
+ if (intrin->intrinsic != nir_intrinsic_store_output &&
+ intrin->intrinsic != nir_intrinsic_load_output)
+ return false;
+
+ unsigned slot = nir_intrinsic_io_semantics(intrin).location;
+ if (slot < FRAG_RESULT_DATA0)
+ return false;
+
+ const struct vk_graphics_pipeline_state *state =
+ (const struct vk_graphics_pipeline_state *)data;
+ VkFormat vk_format =
+ state->rp->color_attachment_formats[slot - FRAG_RESULT_DATA0];
+ if (vk_format == VK_FORMAT_UNDEFINED)
+ return false;
+
+ enum pipe_format format = vk_format_to_pipe_format(vk_format);
+ const struct kk_va_format *supported_format = kk_get_va_format(format);
+
+ /* Check if we have to apply any swizzle */
+ if (!supported_format->is_native) {
+ unsigned channel_swizzle[] = {
+ supported_format->swizzle.red, supported_format->swizzle.green,
+ supported_format->swizzle.blue, supported_format->swizzle.alpha};
+
+ if (intrin->intrinsic == nir_intrinsic_store_output) {
+ b->cursor = nir_before_instr(&intrin->instr);
+ nir_def *to_replace = intrin->src[0].ssa;
+ nir_def *swizzled = nir_swizzle(b, to_replace, channel_swizzle,
+ to_replace->num_components);
+ nir_src_rewrite(&intrin->src[0], swizzled);
+ } else {
+ unsigned channel_unswizzle[4] = {0u};
+ for (uint32_t i = 0u; i < 4; ++i)
+ channel_unswizzle[channel_swizzle[i]] = i;
+
+ b->cursor = nir_after_instr(&intrin->instr);
+ nir_def *to_replace = &intrin->def;
+ nir_def *swizzled = nir_swizzle(b, to_replace, channel_unswizzle,
+ to_replace->num_components);
+ nir_def_rewrite_uses_after(to_replace, swizzled);
+ }
+ return true;
+ }
+
+ return false;
+}
+
+static void
+kk_lower_vs_vbo(nir_shader *nir, const struct vk_graphics_pipeline_state *state)
+{
+ assert(!(nir->info.inputs_read & BITFIELD64_MASK(VERT_ATTRIB_GENERIC0)) &&
+ "Fixed-function attributes not used in Vulkan");
+ NIR_PASS(_, nir, nir_recompute_io_bases, nir_var_shader_in);
+ /* the shader_out portion of this is load-bearing even for tess eval */
+ NIR_PASS(_, nir, nir_io_add_const_offset_to_base,
+ nir_var_shader_in | nir_var_shader_out);
+
+ struct kk_attribute attributes[KK_MAX_ATTRIBS] = {};
+ uint64_t attribs_read = nir->info.inputs_read >> VERT_ATTRIB_GENERIC0;
+ u_foreach_bit(i, state->vi->attributes_valid) {
+ const struct vk_vertex_attribute_state *attr = &state->vi->attributes[i];
+ assert(state->vi->bindings_valid & BITFIELD_BIT(attr->binding));
+ const struct vk_vertex_binding_state *binding =
+ &state->vi->bindings[attr->binding];
+
+ /* nir_assign_io_var_locations compacts vertex inputs, eliminating
+ * unused inputs. We need to do the same here to match the locations.
+ */
+ unsigned slot = util_bitcount64(attribs_read & BITFIELD_MASK(i));
+ attributes[slot].divisor = binding->divisor;
+ attributes[slot].binding = attr->binding;
+ attributes[slot].format = vk_format_to_pipe_format(attr->format);
+ attributes[slot].buf = attr->binding;
+ attributes[slot].instanced =
+ binding->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
+ }
+ NIR_PASS(_, nir, kk_nir_lower_vbo, attributes);
+}
+
+static void
+kk_lower_vs(nir_shader *nir, const struct vk_graphics_pipeline_state *state)
+{
+ if (state->ia->primitive_topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST)
+ nir_shader_intrinsics_pass(nir, msl_nir_vs_remove_point_size_write,
+ nir_metadata_control_flow, NULL);
+
+ NIR_PASS(_, nir, msl_nir_layer_id_type);
+}
+
+static void
+kk_lower_fs_blend(nir_shader *nir,
+ const struct vk_graphics_pipeline_state *state)
+{
+ nir_lower_blend_options opts = {
+ .scalar_blend_const = false,
+ .logicop_enable = state->cb->logic_op_enable,
+ .logicop_func = state->cb->logic_op,
+ };
+
+ static_assert(ARRAY_SIZE(opts.format) == 8, "max RTs out of sync");
+
+ for (unsigned i = 0; i < ARRAY_SIZE(opts.format); ++i) {
+ opts.format[i] =
+ vk_format_to_pipe_format(state->rp->color_attachment_formats[i]);
+ if (state->cb->attachments[i].blend_enable) {
+ opts.rt[i] = (nir_lower_blend_rt){
+ .rgb.src_factor = vk_blend_factor_to_pipe(
+ state->cb->attachments[i].src_color_blend_factor),
+ .rgb.dst_factor = vk_blend_factor_to_pipe(
+ state->cb->attachments[i].dst_color_blend_factor),
+ .rgb.func =
+ vk_blend_op_to_pipe(state->cb->attachments[i].color_blend_op),
+
+ .alpha.src_factor = vk_blend_factor_to_pipe(
+ state->cb->attachments[i].src_alpha_blend_factor),
+ .alpha.dst_factor = vk_blend_factor_to_pipe(
+ state->cb->attachments[i].dst_alpha_blend_factor),
+ .alpha.func =
+ vk_blend_op_to_pipe(state->cb->attachments[i].alpha_blend_op),
+
+ .colormask = state->cb->attachments[i].write_mask,
+ };
+ } else {
+ opts.rt[i] = (nir_lower_blend_rt){
+ .rgb.src_factor = PIPE_BLENDFACTOR_ONE,
+ .rgb.dst_factor = PIPE_BLENDFACTOR_ZERO,
+ .rgb.func = PIPE_BLEND_ADD,
+
+ .alpha.src_factor = PIPE_BLENDFACTOR_ONE,
+ .alpha.dst_factor = PIPE_BLENDFACTOR_ZERO,
+ .alpha.func = PIPE_BLEND_ADD,
+
+ .colormask = state->cb->attachments[i].write_mask,
+ };
+ }
+ }
+ NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_out);
+ NIR_PASS(_, nir, nir_lower_blend, &opts);
+}
+
+static bool
+lower_subpass_dim(nir_builder *b, nir_tex_instr *tex, UNUSED void *_data)
+{
+ if (tex->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS)
+ tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+ else if (tex->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS)
+ tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ else
+ return false;
+
+ return true;
+}
+
+static void
+kk_lower_fs(nir_shader *nir, const struct vk_graphics_pipeline_state *state)
+{
+ if (state->cb)
+ kk_lower_fs_blend(nir, state);
+
+ if (state->rp->depth_attachment_format == VK_FORMAT_UNDEFINED ||
+ nir->info.fs.early_fragment_tests)
+ NIR_PASS(_, nir, nir_shader_intrinsics_pass,
+ msl_nir_fs_remove_depth_write, nir_metadata_control_flow, NULL);
+
+ /* Input attachments are treated as 2D textures. Fixes sampler dimension */
+ NIR_PASS(_, nir, nir_shader_tex_pass, lower_subpass_dim, nir_metadata_all,
+ NULL);
+
+ /* Swizzle non-native formats' outputs */
+ NIR_PASS(_, nir, nir_shader_intrinsics_pass, kk_nir_swizzle_fragment_output,
+ nir_metadata_control_flow, (void *)state);
+
+ /* Metal's sample mask is uint. */
+ NIR_PASS(_, nir, msl_nir_sample_mask_type);
+
+ if (state->ms && state->ms->rasterization_samples &&
+ state->ms->sample_mask != UINT16_MAX)
+ NIR_PASS(_, nir, msl_lower_static_sample_mask, state->ms->sample_mask);
+ /* Check https://github.com/KhronosGroup/Vulkan-Portability/issues/54 for
+ * explanation on why we need this. */
+ else if (nir->info.fs.needs_full_quad_helper_invocations ||
+ nir->info.fs.needs_coarse_quad_helper_invocations)
+ NIR_PASS(_, nir, msl_lower_static_sample_mask, 0xFFFFFFFF);
+}
+
+static void
+kk_lower_nir(struct kk_device *dev, nir_shader *nir,
+ const struct vk_pipeline_robustness_state *rs,
+ uint32_t set_layout_count,
+ struct vk_descriptor_set_layout *const *set_layouts,
+ const struct vk_graphics_pipeline_state *state)
+{
+ /* Massage IO related variables to please Metal */
+ if (nir->info.stage == MESA_SHADER_VERTEX) {
+ NIR_PASS(_, nir, kk_nir_lower_vs_multiview, state->rp->view_mask);
+
+ /* kk_nir_lower_vs_multiview may create a temporary array to assign the
+ * correct view index. Since we don't handle derefs, we need to get rid of
+ * them. */
+ NIR_PASS(_, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 0,
+ glsl_get_natural_size_align_bytes,
+ glsl_get_natural_size_align_bytes);
+
+ NIR_PASS(_, nir, msl_ensure_vertex_position_output);
+ } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ enum pipe_format rts[MAX_DRAW_BUFFERS] = {PIPE_FORMAT_NONE};
+ const struct vk_render_pass_state *rp = state->rp;
+ for (uint32_t i = 0u; i < MAX_DRAW_BUFFERS; ++i)
+ rts[i] = vk_format_to_pipe_format(rp->color_attachment_formats[i]);
+
+ NIR_PASS(_, nir, msl_nir_fs_force_output_signedness, rts);
+
+ NIR_PASS(_, nir, kk_nir_lower_fs_multiview, state->rp->view_mask);
+
+ if (state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED &&
+ state->ial && state->ial->depth_att != MESA_VK_ATTACHMENT_NO_INDEX) {
+ NIR_PASS(_, nir, msl_ensure_depth_write);
+ }
+ }
+
+ const struct lower_ycbcr_state ycbcr_state = {
+ .set_layout_count = set_layout_count,
+ .set_layouts = set_layouts,
+ };
+ NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lookup_ycbcr_conversion,
+ &ycbcr_state);
+
+ /* Common msl texture lowering needs to happen after ycbcr lowering and
+ * before descriptor lowering. */
+ NIR_PASS(_, nir, msl_lower_textures);
+
+ /* Lower push constants before lower_descriptors */
+ NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
+ nir_address_format_32bit_offset);
+
+ NIR_PASS(_, nir, nir_lower_memory_model);
+
+ NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
+ nir_address_format_64bit_global);
+ NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
+ nir_address_format_64bit_bounded_global);
+ NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
+ nir_address_format_64bit_bounded_global);
+
+ NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+ type_size_vec4,
+ nir_lower_io_lower_64bit_to_32 |
+ nir_lower_io_use_interpolated_input_intrinsics);
+
+ if (!nir->info.shared_memory_explicit_layout) {
+ NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared,
+ shared_var_info);
+ }
+ NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared,
+ nir_address_format_32bit_offset);
+
+ if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
+ /* QMD::SHARED_MEMORY_SIZE requires an alignment of 256B so it's safe to
+ * align everything up to 16B so we can write whole vec4s.
+ */
+ nir->info.shared_size = align(nir->info.shared_size, 16);
+ NIR_PASS(_, nir, nir_zero_initialize_shared_memory, nir->info.shared_size,
+ 16);
+
+ /* We need to call lower_compute_system_values again because
+ * nir_zero_initialize_shared_memory generates load_invocation_id which
+ * has to be lowered to load_invocation_index.
+ */
+ NIR_PASS(_, nir, nir_lower_compute_system_values, NULL);
+ }
+
+ NIR_PASS(_, nir, nir_opt_dce);
+ NIR_PASS(_, nir, nir_lower_variable_initializers, ~nir_var_function_temp);
+ NIR_PASS(_, nir, nir_remove_dead_variables,
+ nir_var_shader_in | nir_var_shader_out | nir_var_system_value,
+ NULL);
+ nir->info.io_lowered = true;
+
+ /* Required before kk_nir_lower_vbo so load_input intrinsics' parents are
+ * load_const, otherwise the pass will complain */
+ NIR_PASS(_, nir, nir_opt_constant_folding);
+
+ /* These passes operate on lowered IO. */
+ if (nir->info.stage == MESA_SHADER_VERTEX) {
+ kk_lower_vs(nir, state);
+ } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ kk_lower_fs(nir, state);
+ }
+
+ /* Descriptor lowering needs to happen after lowering blend since we will
+ * generate a nir_intrinsic_load_blend_const_color_rgba which gets lowered by
+ * the lower descriptor pass
+ */
+ NIR_PASS(_, nir, kk_nir_lower_descriptors, rs, set_layout_count,
+ set_layouts);
+ NIR_PASS(_, nir, kk_nir_lower_textures);
+
+ NIR_PASS(_, nir, nir_lower_global_vars_to_local);
+}
+
+static const struct vk_shader_ops kk_shader_ops;
+
+static void
+kk_shader_destroy(struct vk_device *vk_dev, struct vk_shader *vk_shader,
+ const VkAllocationCallbacks *pAllocator)
+{
+ struct kk_device *dev = container_of(vk_dev, struct kk_device, vk);
+ struct kk_shader *shader = container_of(vk_shader, struct kk_shader, vk);
+
+ if (shader->pipeline.cs) {
+ mtl_release(shader->pipeline.cs);
+ } else if (shader->pipeline.gfx.handle) {
+ mtl_release(shader->pipeline.gfx.handle);
+ if (shader->pipeline.gfx.mtl_depth_stencil_state_handle)
+ mtl_release(shader->pipeline.gfx.mtl_depth_stencil_state_handle);
+ shader->pipeline.gfx.handle = NULL;
+ shader->pipeline.gfx.mtl_depth_stencil_state_handle = NULL;
+ }
+
+ ralloc_free((void *)shader->msl_code);
+ ralloc_free((void *)shader->entrypoint_name);
+
+ vk_shader_free(&dev->vk, pAllocator, &shader->vk);
+}
+
+static bool
+gather_vs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+ if (intr->intrinsic != nir_intrinsic_load_input)
+ return false;
+
+ struct nir_io_semantics io = nir_intrinsic_io_semantics(intr);
+ BITSET_WORD *attribs_read = data;
+ BITSET_SET(attribs_read, (io.location - VERT_ATTRIB_GENERIC0));
+ return false;
+}
+
+static void
+gather_shader_info(struct kk_shader *shader, nir_shader *nir,
+ const struct vk_graphics_pipeline_state *state)
+{
+ shader->info.stage = nir->info.stage;
+ if (nir->info.stage == MESA_SHADER_VERTEX) {
+ nir_shader_intrinsics_pass(nir, gather_vs_inputs, nir_metadata_all,
+ &shader->info.vs.attribs_read);
+ } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ /* Some meta shaders like vk-meta-resolve will have depth_layout as NONE
+ * which is not a valid Metal layout */
+ if (nir->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE)
+ nir->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
+ } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
+ shader->info.cs.local_size.x = nir->info.workgroup_size[0];
+ shader->info.cs.local_size.y = nir->info.workgroup_size[1];
+ shader->info.cs.local_size.z = nir->info.workgroup_size[2];
+ }
+}
+
+static void
+modify_nir_info(nir_shader *nir)
+{
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+ if (nir->info.stage == MESA_SHADER_VERTEX) {
+ /* Vertex attribute fetch is done in shader through argument buffers. */
+ nir->info.inputs_read = 0u;
+ } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ /* Some meta shaders like vk-meta-resolve will have depth_layout as NONE
+ * which is not a valid Metal layout */
+ if (nir->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE)
+ nir->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY;
+
+ /* These values are part of the declaration and go with IO. We only
+ * require the instrunctions to understand interpolation mode. */
+ BITSET_CLEAR(nir->info.system_values_read,
+ SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
+ BITSET_CLEAR(nir->info.system_values_read,
+ SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);
+ BITSET_CLEAR(nir->info.system_values_read,
+ SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
+ BITSET_CLEAR(nir->info.system_values_read,
+ SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
+ BITSET_CLEAR(nir->info.system_values_read,
+ SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
+ BITSET_CLEAR(nir->info.system_values_read,
+ SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
+ }
+}
+
+static VkResult
+kk_compile_shader(struct kk_device *dev, struct vk_shader_compile_info *info,
+ const struct vk_graphics_pipeline_state *state,
+ const VkAllocationCallbacks *pAllocator,
+ struct vk_shader **shader_out)
+{
+ struct kk_shader *shader;
+ VkResult result = VK_SUCCESS;
+
+ /* We consume the NIR, regardless of success or failure */
+ nir_shader *nir = info->nir;
+
+ shader = vk_shader_zalloc(&dev->vk, &kk_shader_ops, info->stage, pAllocator,
+ sizeof(*shader));
+ if (shader == NULL) {
+ ralloc_free(nir);
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ if (nir->info.io_lowered == false)
+ kk_lower_nir(dev, nir, info->robustness, info->set_layout_count,
+ info->set_layouts, state);
+
+ gather_shader_info(shader, nir, state);
+
+ /* VBO lowering needs to go here otherwise, the linking step removes all
+ * inputs since we read vertex attributes from UBOs. */
+ if (info->stage == MESA_SHADER_VERTEX) {
+ kk_lower_vs_vbo(nir, state);
+ }
+ msl_optimize_nir(nir);
+ modify_nir_info(nir);
+ shader->msl_code = nir_to_msl(nir, NULL);
+ const char *entrypoint_name = nir_shader_get_entrypoint(nir)->function->name;
+
+ /* We need to steal so it doesn't get destroyed with the nir. Needs to happen
+ * after nir_to_msl since that's where we rename the entrypoint.
+ */
+ ralloc_steal(NULL, (void *)entrypoint_name);
+ shader->entrypoint_name = entrypoint_name;
+
+ if (KK_DEBUG(MSL))
+ mesa_logi("%s\n", shader->msl_code);
+
+ ralloc_free(nir);
+
+ *shader_out = &shader->vk;
+
+ return result;
+}
+
+static const struct vk_pipeline_robustness_state rs_none = {
+ .uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
+ .storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
+ .images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
+};
+
+VkResult
+kk_compile_nir_shader(struct kk_device *dev, nir_shader *nir,
+ const VkAllocationCallbacks *alloc,
+ struct kk_shader **shader_out)
+{
+ const struct kk_physical_device *pdev = kk_device_physical(dev);
+
+ assert(nir->info.stage == MESA_SHADER_COMPUTE);
+ if (nir->options == NULL)
+ nir->options = kk_get_nir_options((struct vk_physical_device *)&pdev->vk,
+ nir->info.stage, &rs_none);
+
+ struct vk_shader_compile_info info = {
+ .stage = nir->info.stage,
+ .nir = nir,
+ .robustness = &rs_none,
+ };
+
+ struct vk_shader *shader = NULL;
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+ VkResult result = kk_compile_shader(dev, &info, NULL, alloc, &shader);
+ if (result != VK_SUCCESS)
+ return result;
+
+ *shader_out = container_of(shader, struct kk_shader, vk);
+
+ return VK_SUCCESS;
+}
+
+static void
+nir_opts(nir_shader *nir)
+{
+ bool progress;
+
+ do {
+ progress = false;
+
+ NIR_PASS(progress, nir, nir_opt_loop);
+ NIR_PASS(progress, nir, nir_copy_prop);
+ NIR_PASS(progress, nir, nir_opt_remove_phis);
+ NIR_PASS(progress, nir, nir_opt_dce);
+
+ NIR_PASS(progress, nir, nir_opt_if, 0);
+ NIR_PASS(progress, nir, nir_opt_dead_cf);
+ NIR_PASS(progress, nir, nir_opt_cse);
+
+ NIR_PASS(progress, nir, nir_opt_peephole_select,
+ &(nir_opt_peephole_select_options){
+ .limit = 8,
+ .expensive_alu_ok = true,
+ .discard_ok = true,
+ });
+
+ NIR_PASS(progress, nir, nir_opt_phi_precision);
+ NIR_PASS(progress, nir, nir_opt_algebraic);
+ NIR_PASS(progress, nir, nir_opt_constant_folding);
+ NIR_PASS(progress, nir, nir_io_add_const_offset_to_base,
+ nir_var_shader_in | nir_var_shader_out);
+
+ NIR_PASS(progress, nir, nir_opt_undef);
+ NIR_PASS(progress, nir, nir_opt_loop_unroll);
+ } while (progress);
+}
+
+static nir_shader *
+get_empty_nir(struct kk_device *dev, mesa_shader_stage stage,
+ const struct vk_graphics_pipeline_state *state)
+{
+ nir_shader *nir = nir_shader_create(
+ NULL, stage,
+ kk_get_nir_options(&kk_device_physical(dev)->vk, stage, NULL));
+
+ nir_function *function = nir_function_create(nir, "main_entrypoint");
+ function->is_entrypoint = true;
+ nir_function_impl_create(function);
+
+ const struct vk_pipeline_robustness_state no_robustness = {
+ .storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED,
+ .uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED,
+ .vertex_inputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED,
+ .images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED,
+ .null_uniform_buffer_descriptor = false,
+ .null_storage_buffer_descriptor = false,
+ };
+ kk_lower_nir(dev, nir, &no_robustness, 0u, NULL, state);
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+ return nir;
+}
+
+static VkResult
+kk_compile_compute_pipeline(struct kk_device *device, struct kk_shader *shader)
+{
+ uint32_t local_size_threads = shader->info.cs.local_size.x *
+ shader->info.cs.local_size.y *
+ shader->info.cs.local_size.z;
+ mtl_library *library = mtl_new_library(device->mtl_handle, shader->msl_code);
+ if (library == NULL)
+ return VK_ERROR_INVALID_SHADER_NV;
+
+ mtl_function *function =
+ mtl_new_function_with_name(library, shader->entrypoint_name);
+ shader->pipeline.cs = mtl_new_compute_pipeline_state(
+ device->mtl_handle, function, local_size_threads);
+ mtl_release(function);
+ mtl_release(library);
+
+ if (shader->pipeline.cs == NULL)
+ return VK_ERROR_INVALID_SHADER_NV;
+
+ return VK_SUCCESS;
+}
+
+static bool
+has_static_depth_stencil_state(const struct vk_graphics_pipeline_state *state)
+{
+ if (!state->ds)
+ return false;
+
+ return !(
+ BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) |
+ BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) |
+ BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) |
+ BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) |
+ BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_OP) |
+ BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) |
+ BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK));
+}
+
+mtl_depth_stencil_state *
+kk_compile_depth_stencil_state(struct kk_device *device,
+ const struct vk_depth_stencil_state *ds,
+ bool has_depth, bool has_stencil)
+{
+ mtl_stencil_descriptor *front = NULL;
+ mtl_stencil_descriptor *back = NULL;
+ mtl_depth_stencil_descriptor *descriptor =
+ mtl_new_depth_stencil_descriptor();
+ if (has_depth && ds->depth.test_enable) {
+ mtl_depth_stencil_descriptor_set_depth_write_enabled(
+ descriptor, ds->depth.write_enable);
+ mtl_depth_stencil_descriptor_set_depth_compare_function(
+ descriptor, ds->depth.compare_op);
+ } else {
+ /* Only way to disable is to always pass */
+ mtl_depth_stencil_descriptor_set_depth_write_enabled(descriptor, false);
+ mtl_depth_stencil_descriptor_set_depth_compare_function(
+ descriptor, VK_COMPARE_OP_ALWAYS);
+ }
+
+ if (has_stencil && ds->stencil.test_enable) {
+ back = mtl_new_stencil_descriptor();
+ mtl_stencil_descriptor_set_depth_failure_operation(
+ back, ds->stencil.back.op.depth_fail);
+ mtl_stencil_descriptor_set_stencil_failure_operation(
+ back, ds->stencil.back.op.fail);
+ mtl_stencil_descriptor_set_depth_stencil_pass_operation(
+ back, ds->stencil.back.op.pass);
+ mtl_stencil_descriptor_set_stencil_compare_function(
+ back, ds->stencil.back.op.compare);
+ mtl_stencil_descriptor_set_read_mask(back, ds->stencil.back.compare_mask);
+ mtl_stencil_descriptor_set_write_mask(back, ds->stencil.back.write_mask);
+ mtl_depth_stencil_descriptor_set_back_face_stencil(descriptor, back);
+
+ front = mtl_new_stencil_descriptor();
+ mtl_stencil_descriptor_set_depth_failure_operation(
+ front, ds->stencil.front.op.depth_fail);
+ mtl_stencil_descriptor_set_stencil_failure_operation(
+ front, ds->stencil.front.op.fail);
+ mtl_stencil_descriptor_set_depth_stencil_pass_operation(
+ front, ds->stencil.front.op.pass);
+ mtl_stencil_descriptor_set_stencil_compare_function(
+ front, ds->stencil.front.op.compare);
+ mtl_stencil_descriptor_set_read_mask(front,
+ ds->stencil.front.compare_mask);
+ mtl_stencil_descriptor_set_write_mask(front,
+ ds->stencil.front.write_mask);
+ mtl_depth_stencil_descriptor_set_front_face_stencil(descriptor, front);
+ }
+
+ mtl_depth_stencil_state *state =
+ mtl_new_depth_stencil_state(device->mtl_handle, descriptor);
+
+ if (front)
+ mtl_release(front);
+ if (back)
+ mtl_release(back);
+ mtl_release(descriptor);
+
+ return state;
+}
+
+/* TODO_KOSMICKRISP For now we just support vertex and fragment */
+static VkResult
+kk_compile_graphics_pipeline(struct kk_device *device,
+ struct kk_shader *vertex_shader,
+ struct kk_shader *fragment_shader,
+ const struct vk_graphics_pipeline_state *state)
+{
+ VkResult result = VK_SUCCESS;
+
+ assert(vertex_shader->info.stage == MESA_SHADER_VERTEX &&
+ fragment_shader->info.stage == MESA_SHADER_FRAGMENT);
+
+ mtl_library *vertex_library =
+ mtl_new_library(device->mtl_handle, vertex_shader->msl_code);
+ if (vertex_library == NULL)
+ return VK_ERROR_INVALID_SHADER_NV;
+
+ mtl_function *vertex_function = mtl_new_function_with_name(
+ vertex_library, vertex_shader->entrypoint_name);
+
+ mtl_library *fragment_library =
+ mtl_new_library(device->mtl_handle, fragment_shader->msl_code);
+ if (fragment_library == NULL) {
+ result = VK_ERROR_INVALID_SHADER_NV;
+ goto destroy_vertex;
+ }
+ mtl_function *fragment_function = mtl_new_function_with_name(
+ fragment_library, fragment_shader->entrypoint_name);
+
+ mtl_render_pipeline_descriptor *pipeline_descriptor =
+ mtl_new_render_pipeline_descriptor();
+ mtl_render_pipeline_descriptor_set_vertex_shader(pipeline_descriptor,
+ vertex_function);
+ if (fragment_function)
+ mtl_render_pipeline_descriptor_set_fragment_shader(pipeline_descriptor,
+ fragment_function);
+ /* Layered rendering in Metal requires setting primitive topology class */
+ mtl_render_pipeline_descriptor_set_input_primitive_topology(
+ pipeline_descriptor,
+ vk_primitive_topology_to_mtl_primitive_topology_class(
+ state->ia->primitive_topology));
+
+ for (uint8_t i = 0; i < state->rp->color_attachment_count; ++i) {
+ if (state->rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED)
+ mtl_render_pipeline_descriptor_set_color_attachment_format(
+ pipeline_descriptor, i,
+ vk_format_to_mtl_pixel_format(
+ state->rp->color_attachment_formats[i]));
+ }
+
+ if (state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED)
+ mtl_render_pipeline_descriptor_set_depth_attachment_format(
+ pipeline_descriptor,
+ vk_format_to_mtl_pixel_format(state->rp->depth_attachment_format));
+
+ if (state->rp->stencil_attachment_format != VK_FORMAT_UNDEFINED)
+ mtl_render_pipeline_descriptor_set_stencil_attachment_format(
+ pipeline_descriptor,
+ vk_format_to_mtl_pixel_format(state->rp->stencil_attachment_format));
+
+ if (has_static_depth_stencil_state(state)) {
+ bool has_depth =
+ state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED;
+ bool has_stencil =
+ state->rp->stencil_attachment_format != VK_FORMAT_UNDEFINED;
+ vertex_shader->pipeline.gfx.mtl_depth_stencil_state_handle =
+ kk_compile_depth_stencil_state(device, state->ds, has_depth,
+ has_stencil);
+ }
+
+ if (state->rp->view_mask) {
+ uint32_t max_amplification = util_bitcount(state->rp->view_mask);
+ mtl_render_pipeline_descriptor_set_max_vertex_amplification_count(
+ pipeline_descriptor, max_amplification);
+ }
+
+ if (state->ms) {
+ mtl_render_pipeline_descriptor_set_raster_sample_count(
+ pipeline_descriptor, state->ms->rasterization_samples);
+ mtl_render_pipeline_descriptor_set_alpha_to_coverage(
+ pipeline_descriptor, state->ms->alpha_to_coverage_enable);
+ mtl_render_pipeline_descriptor_set_alpha_to_one(
+ pipeline_descriptor, state->ms->alpha_to_one_enable);
+ }
+
+ vertex_shader->pipeline.gfx.handle =
+ mtl_new_render_pipeline(device->mtl_handle, pipeline_descriptor);
+ if (vertex_shader->pipeline.gfx.handle == NULL)
+ result = VK_ERROR_INVALID_SHADER_NV;
+ vertex_shader->pipeline.gfx.primitive_type =
+ vk_primitive_topology_to_mtl_primitive_type(
+ state->ia->primitive_topology);
+
+ mtl_release(pipeline_descriptor);
+ mtl_release(fragment_function);
+ mtl_release(fragment_library);
+destroy_vertex:
+ mtl_release(vertex_function);
+ mtl_release(vertex_library);
+
+ return result;
+}
+
+static VkResult
+kk_compile_shaders(struct vk_device *device, uint32_t shader_count,
+ struct vk_shader_compile_info *infos,
+ const struct vk_graphics_pipeline_state *state,
+ const struct vk_features *enabled_features,
+ const VkAllocationCallbacks *pAllocator,
+ struct vk_shader **shaders_out)
+{
+ VkResult result = VK_SUCCESS;
+ struct kk_device *dev = container_of(device, struct kk_device, vk);
+
+ /* Vulkan doesn't enforce a fragment shader to build pipelines. We may need
+ * to create one. */
+ nir_shader *null_fs = NULL;
+ nir_shader *shaders[shader_count + 1u];
+
+ /* Lower shaders, notably lowering IO. This is a prerequisite for intershader
+ * optimization. */
+ for (uint32_t i = 0u; i < shader_count; ++i) {
+ const struct vk_shader_compile_info *info = &infos[i];
+ nir_shader *nir = info->nir;
+
+ kk_lower_nir(dev, nir, info->robustness, info->set_layout_count,
+ info->set_layouts, state);
+
+ shaders[i] = nir;
+ }
+
+ /* Since we don't support GPL nor shader objects and Metal render pipelines
+ * require both vertex and fragment, we may need to provide a pass-through
+ * fragment. */
+ if (state &&
+ shaders[shader_count - 1u]->info.stage != MESA_SHADER_FRAGMENT) {
+ null_fs = get_empty_nir(dev, MESA_SHADER_FRAGMENT, state);
+ shaders[shader_count] = null_fs;
+ }
+
+ uint32_t total_shaders = null_fs ? shader_count + 1 : shader_count;
+ nir_opt_varyings_bulk(shaders, total_shaders, true, UINT32_MAX, UINT32_MAX,
+ nir_opts);
+ /* Second pass is required because some dEQP-VK.glsl.matrix.sub.dynamic.*
+ * would fail otherwise due to vertex outputting vec4 while fragments reading
+ * vec3 when in reality only vec3 is needed. */
+ nir_opt_varyings_bulk(shaders, total_shaders, true, UINT32_MAX, UINT32_MAX,
+ nir_opts);
+
+ for (uint32_t i = 0; i < shader_count; i++) {
+ result =
+ kk_compile_shader(dev, &infos[i], state, pAllocator, &shaders_out[i]);
+ if (result != VK_SUCCESS) {
+ /* Clean up all the shaders before this point */
+ for (uint32_t j = 0; j < i; j++)
+ kk_shader_destroy(&dev->vk, shaders_out[j], pAllocator);
+
+ /* Clean up all the NIR after this point */
+ for (uint32_t j = i + 1; j < shader_count; j++)
+ ralloc_free(shaders[j]);
+
+ if (null_fs)
+ ralloc_free(null_fs);
+
+ /* Memset the output array */
+ memset(shaders_out, 0, shader_count * sizeof(*shaders_out));
+
+ return result;
+ }
+ }
+
+ /* Compile pipeline:
+ * 1. Compute pipeline
+ * 2. Graphics with all stages (since we don't support GPL nor shader
+ * objects for now). This will be addressed later.
+ */
+ if (shaders_out[0]->stage == MESA_SHADER_COMPUTE) {
+ result = kk_compile_compute_pipeline(
+ dev, container_of(shaders_out[0], struct kk_shader, vk));
+ } else {
+ struct kk_shader *vs = container_of(shaders_out[0], struct kk_shader, vk);
+ struct kk_shader *fs =
+ container_of(shaders_out[shader_count - 1u], struct kk_shader, vk);
+ if (null_fs) {
+ struct vk_shader_compile_info info = {
+ .stage = MESA_SHADER_FRAGMENT,
+ .nir = null_fs,
+ .robustness = &rs_none,
+ };
+ struct vk_shader *frag_shader;
+ result =
+ kk_compile_shader(dev, &info, state, &dev->vk.alloc, &frag_shader);
+
+ if (result != VK_SUCCESS) {
+ for (uint32_t i = 0; i < shader_count; i++)
+ kk_shader_destroy(&dev->vk, shaders_out[i], pAllocator);
+
+ /* Memset the output array */
+ memset(shaders_out, 0, shader_count * sizeof(*shaders_out));
+
+ return result;
+ }
+ fs = container_of(frag_shader, struct kk_shader, vk);
+ }
+
+ result = kk_compile_graphics_pipeline(dev, vs, fs, state);
+
+ if (null_fs)
+ kk_shader_destroy(&dev->vk, &fs->vk, pAllocator);
+ }
+
+ return result;
+}
+
+static bool
+kk_shader_serialize(struct vk_device *vk_dev, const struct vk_shader *vk_shader,
+ struct blob *blob)
+{
+ struct kk_shader *shader = container_of(vk_shader, struct kk_shader, vk);
+
+ blob_write_bytes(blob, &shader->info, sizeof(shader->info));
+ uint32_t entrypoint_length = strlen(shader->entrypoint_name) + 1;
+ blob_write_bytes(blob, &entrypoint_length, sizeof(entrypoint_length));
+ uint32_t code_length = strlen(shader->msl_code) + 1;
+ blob_write_bytes(blob, &code_length, sizeof(code_length));
+ blob_write_bytes(blob, shader->entrypoint_name, entrypoint_length);
+ blob_write_bytes(blob, shader->msl_code, code_length);
+ blob_write_bytes(blob, &shader->pipeline, sizeof(shader->pipeline));
+
+ /* We are building a new shader into the cache so we need to retain resources
+ */
+ if (shader->info.stage == MESA_SHADER_COMPUTE)
+ mtl_retain(shader->pipeline.cs);
+ else if (shader->info.stage == MESA_SHADER_VERTEX) {
+ mtl_retain(shader->pipeline.gfx.handle);
+ if (shader->pipeline.gfx.mtl_depth_stencil_state_handle)
+ mtl_retain(shader->pipeline.gfx.mtl_depth_stencil_state_handle);
+ }
+
+ return !blob->out_of_memory;
+}
+
+static VkResult
+kk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob,
+ uint32_t binary_version,
+ const VkAllocationCallbacks *pAllocator,
+ struct vk_shader **shader_out)
+{
+ struct kk_device *dev = container_of(vk_dev, struct kk_device, vk);
+ struct kk_shader *shader;
+
+ struct kk_shader_info info;
+ blob_copy_bytes(blob, &info, sizeof(info));
+
+ const uint32_t entrypoint_length = blob_read_uint32(blob);
+ const uint32_t code_length = blob_read_uint32(blob);
+ if (blob->overrun)
+ return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
+
+ shader = vk_shader_zalloc(&dev->vk, &kk_shader_ops, info.stage, pAllocator,
+ sizeof(*shader));
+ if (shader == NULL)
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ shader->info = info;
+
+ shader->entrypoint_name = ralloc_array(NULL, char, entrypoint_length);
+ if (shader->entrypoint_name == NULL) {
+ kk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ shader->msl_code = ralloc_array(NULL, char, code_length);
+ if (shader->msl_code == NULL) {
+ kk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+ return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ blob_copy_bytes(blob, (void *)shader->entrypoint_name, entrypoint_length);
+ blob_copy_bytes(blob, (void *)shader->msl_code, code_length);
+ blob_copy_bytes(blob, &shader->pipeline, sizeof(shader->pipeline));
+ if (blob->overrun) {
+ kk_shader_destroy(&dev->vk, &shader->vk, pAllocator);
+ return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT);
+ }
+
+ /* We are building a new shader so we need to retain resources */
+ if (info.stage == MESA_SHADER_COMPUTE)
+ mtl_retain(shader->pipeline.cs);
+ else if (info.stage == MESA_SHADER_VERTEX) {
+ mtl_retain(shader->pipeline.gfx.handle);
+ if (shader->pipeline.gfx.mtl_depth_stencil_state_handle)
+ mtl_retain(shader->pipeline.gfx.mtl_depth_stencil_state_handle);
+ }
+
+ *shader_out = &shader->vk;
+
+ return VK_SUCCESS;
+}
+
+static void
+kk_cmd_bind_compute_shader(struct kk_cmd_buffer *cmd, struct kk_shader *shader)
+{
+ cmd->state.cs.pipeline_state = shader->pipeline.cs;
+ cmd->state.cs.dirty |= KK_DIRTY_PIPELINE;
+ cmd->state.cs.local_size = shader->info.cs.local_size;
+}
+
+static void
+kk_cmd_bind_graphics_shader(struct kk_cmd_buffer *cmd,
+ const mesa_shader_stage stage,
+ struct kk_shader *shader)
+{
+ /* Relevant pipeline data is only stored in vertex shaders */
+ if (stage != MESA_SHADER_VERTEX)
+ return;
+
+ cmd->state.gfx.primitive_type = shader->pipeline.gfx.primitive_type;
+ cmd->state.gfx.pipeline_state = shader->pipeline.gfx.handle;
+ cmd->state.gfx.vb.attribs_read = shader->info.vs.attribs_read;
+
+ bool requires_dynamic_depth_stencil =
+ shader->pipeline.gfx.mtl_depth_stencil_state_handle == NULL;
+ if (cmd->state.gfx.is_depth_stencil_dynamic) {
+ /* If we are switching from dynamic to static, we need to clean up
+ * temporary state. Otherwise, leave the existing dynamic state
+ * untouched.
+ */
+ if (!requires_dynamic_depth_stencil) {
+ mtl_release(cmd->state.gfx.depth_stencil_state);
+ cmd->state.gfx.depth_stencil_state =
+ shader->pipeline.gfx.mtl_depth_stencil_state_handle;
+ }
+ } else
+ cmd->state.gfx.depth_stencil_state =
+ shader->pipeline.gfx.mtl_depth_stencil_state_handle;
+ cmd->state.gfx.is_depth_stencil_dynamic = requires_dynamic_depth_stencil;
+ cmd->state.gfx.dirty |= KK_DIRTY_PIPELINE;
+}
+
+static void
+kk_cmd_bind_shaders(struct vk_command_buffer *cmd_buffer, uint32_t stage_count,
+ const mesa_shader_stage *stages,
+ struct vk_shader **const shaders)
+{
+ struct kk_cmd_buffer *cmd =
+ container_of(cmd_buffer, struct kk_cmd_buffer, vk);
+
+ for (uint32_t i = 0; i < stage_count; i++) {
+ struct kk_shader *shader = container_of(shaders[i], struct kk_shader, vk);
+
+ if (stages[i] == MESA_SHADER_COMPUTE || stages[i] == MESA_SHADER_KERNEL)
+ kk_cmd_bind_compute_shader(cmd, shader);
+ else
+ kk_cmd_bind_graphics_shader(cmd, stages[i], shader);
+ }
+}
+
+static VkResult
+kk_shader_get_executable_properties(
+ UNUSED struct vk_device *device, const struct vk_shader *vk_shader,
+ uint32_t *executable_count, VkPipelineExecutablePropertiesKHR *properties)
+{
+ VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, properties,
+ executable_count);
+
+ return vk_outarray_status(&out);
+}
+
+static VkResult
+kk_shader_get_executable_statistics(
+ UNUSED struct vk_device *device, const struct vk_shader *vk_shader,
+ uint32_t executable_index, uint32_t *statistic_count,
+ VkPipelineExecutableStatisticKHR *statistics)
+{
+ /* TODO_KOSMICKRISP */
+ return VK_SUCCESS;
+}
+
+static VkResult
+kk_shader_get_executable_internal_representations(
+ UNUSED struct vk_device *device, const struct vk_shader *vk_shader,
+ uint32_t executable_index, uint32_t *internal_representation_count,
+ VkPipelineExecutableInternalRepresentationKHR *internal_representations)
+{
+ /* TODO_KOSMICKRISP */
+ return VK_SUCCESS;
+}
+
+static const struct vk_shader_ops kk_shader_ops = {
+ .destroy = kk_shader_destroy,
+ .serialize = kk_shader_serialize,
+ .get_executable_properties = kk_shader_get_executable_properties,
+ .get_executable_statistics = kk_shader_get_executable_statistics,
+ .get_executable_internal_representations =
+ kk_shader_get_executable_internal_representations,
+};
+
+const struct vk_device_shader_ops kk_device_shader_ops = {
+ .get_nir_options = kk_get_nir_options,
+ .get_spirv_options = kk_get_spirv_options,
+ .preprocess_nir = kk_preprocess_nir,
+ .hash_state = kk_hash_graphics_state,
+ .compile =
+ kk_compile_shaders, /* This will only generate the MSL string we need to
+ use for actual library generation */
+ .deserialize = kk_deserialize_shader,
+ .cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state,
+ .cmd_bind_shaders = kk_cmd_bind_shaders,
+}; \ No newline at end of file
diff --git a/src/kosmickrisp/vulkan/kk_shader.h b/src/kosmickrisp/vulkan/kk_shader.h
new file mode 100644
index 00000000000..6015d81815b
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_shader.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_SHADER_H
+#define KK_SHADER_H 1
+
+#include "kk_device_memory.h"
+#include "kk_private.h"
+
+#include "vk_pipeline_cache.h"
+
+#include "vk_shader.h"
+
+struct kk_shader_info {
+ mesa_shader_stage stage;
+ union {
+ struct {
+ uint32_t attribs_read;
+ } vs;
+
+ struct {
+ struct mtl_size local_size;
+ } cs;
+ };
+};
+
+struct kk_shader {
+ struct vk_shader vk;
+ const char *entrypoint_name;
+ const char *msl_code;
+
+ struct kk_shader_info info;
+
+ /* Pipeline resources. Only stored in compute or vertex shaders */
+ struct {
+ union {
+ struct {
+ mtl_render_pipeline_state *handle;
+ mtl_depth_stencil_state *mtl_depth_stencil_state_handle;
+ enum mtl_primitive_type primitive_type;
+ } gfx;
+ mtl_compute_pipeline_state *cs;
+ };
+ } pipeline;
+};
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(kk_shader, vk.base, VkShaderEXT,
+ VK_OBJECT_TYPE_SHADER_EXT);
+
+extern const struct vk_device_shader_ops kk_device_shader_ops;
+
+static inline nir_address_format
+kk_buffer_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness)
+{
+ switch (robustness) {
+ case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT:
+ return nir_address_format_64bit_global_32bit_offset;
+ case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT:
+ case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT:
+ return nir_address_format_64bit_bounded_global;
+ default:
+ UNREACHABLE("Invalid robust buffer access behavior");
+ }
+}
+
+bool
+kk_nir_lower_descriptors(nir_shader *nir,
+ const struct vk_pipeline_robustness_state *rs,
+ uint32_t set_layout_count,
+ struct vk_descriptor_set_layout *const *set_layouts);
+
+bool kk_nir_lower_textures(nir_shader *nir);
+
+bool kk_nir_lower_vs_multiview(nir_shader *nir, uint32_t view_mask);
+bool kk_nir_lower_fs_multiview(nir_shader *nir, uint32_t view_mask);
+
+VkResult kk_compile_nir_shader(struct kk_device *dev, nir_shader *nir,
+ const VkAllocationCallbacks *alloc,
+ struct kk_shader **shader_out);
+
+#endif /* KK_SHADER_H */
diff --git a/src/kosmickrisp/vulkan/kk_sync.c b/src/kosmickrisp/vulkan/kk_sync.c
new file mode 100644
index 00000000000..f44200c379e
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_sync.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_sync.h"
+
+#include "kk_device.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+static VkResult
+kk_timeline_init(struct vk_device *device, struct vk_sync *sync,
+ uint64_t initial_value)
+{
+ struct kk_sync_timeline *timeline =
+ container_of(sync, struct kk_sync_timeline, base);
+
+ struct kk_device *dev = container_of(device, struct kk_device, vk);
+ timeline->mtl_handle = mtl_new_shared_event(dev->mtl_handle);
+ mtl_shared_event_set_signaled_value(timeline->mtl_handle, initial_value);
+
+ return VK_SUCCESS;
+}
+
+static void
+kk_timeline_finish(struct vk_device *device, struct vk_sync *sync)
+{
+ struct kk_sync_timeline *timeline =
+ container_of(sync, struct kk_sync_timeline, base);
+ mtl_release(timeline->mtl_handle);
+}
+
+static VkResult
+kk_timeline_signal(struct vk_device *device, struct vk_sync *sync,
+ uint64_t value)
+{
+ struct kk_sync_timeline *timeline =
+ container_of(sync, struct kk_sync_timeline, base);
+ mtl_shared_event_set_signaled_value(timeline->mtl_handle, value);
+ return VK_SUCCESS;
+}
+
+static VkResult
+kk_timeline_get_value(struct vk_device *device, struct vk_sync *sync,
+ uint64_t *value)
+{
+ struct kk_sync_timeline *timeline =
+ container_of(sync, struct kk_sync_timeline, base);
+ *value = mtl_shared_event_get_signaled_value(timeline->mtl_handle);
+ return VK_SUCCESS;
+}
+
+static VkResult
+kk_timeline_wait(struct vk_device *device, struct vk_sync *sync,
+ uint64_t wait_value, enum vk_sync_wait_flags wait_flags,
+ uint64_t abs_timeout_ns)
+{
+ struct kk_sync_timeline *timeline =
+ container_of(sync, struct kk_sync_timeline, base);
+
+ /* abs_timeout_ns is the point in time when we should stop waiting, not the
+ * absolute time in ns. Therefore, we need to compute the delta from now to
+ * when we should stop waiting and convert to ms for Metal to be happy
+ * (Similar to what dzn does).
+ */
+ uint64_t timeout_ms = 0u;
+ if (abs_timeout_ns == OS_TIMEOUT_INFINITE) {
+ timeout_ms = OS_TIMEOUT_INFINITE;
+ } else {
+ uint64_t cur_time = os_time_get_nano();
+ uint64_t rel_timeout_ns =
+ abs_timeout_ns > cur_time ? abs_timeout_ns - cur_time : 0;
+
+ timeout_ms =
+ (rel_timeout_ns / 1000000) + (rel_timeout_ns % 1000000 ? 1 : 0);
+ }
+ int completed = mtl_shared_event_wait_until_signaled_value(
+ timeline->mtl_handle, wait_value, timeout_ms);
+
+ return completed != 0 ? VK_SUCCESS : VK_TIMEOUT;
+}
+
+const struct vk_sync_type kk_sync_type = {
+ .size = sizeof(struct kk_sync_timeline),
+ .features = VK_SYNC_FEATURE_TIMELINE | VK_SYNC_FEATURE_GPU_WAIT |
+ VK_SYNC_FEATURE_CPU_WAIT | VK_SYNC_FEATURE_CPU_SIGNAL |
+ VK_SYNC_FEATURE_WAIT_PENDING |
+ VK_SYNC_FEATURE_WAIT_BEFORE_SIGNAL,
+ .init = kk_timeline_init,
+ .finish = kk_timeline_finish,
+ .signal = kk_timeline_signal,
+ .get_value = kk_timeline_get_value,
+ .reset = NULL,
+ .move = NULL,
+ .wait = kk_timeline_wait,
+ .wait_many = NULL,
+ .import_opaque_fd = NULL,
+ .export_opaque_fd = NULL,
+ .import_sync_file = NULL,
+ .export_sync_file = NULL,
+ .import_win32_handle = NULL,
+ .export_win32_handle = NULL,
+ .set_win32_export_params = NULL,
+};
diff --git a/src/kosmickrisp/vulkan/kk_sync.h b/src/kosmickrisp/vulkan/kk_sync.h
new file mode 100644
index 00000000000..da4f149918d
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_sync.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_SYNC_TYPES_H
+#define KK_SYNC_TYPES_H 1
+
+#include "kosmickrisp/bridge/mtl_types.h"
+
+#include "vk_sync.h"
+
+struct kk_queue;
+
+struct kk_sync_timeline {
+ struct vk_sync base;
+ mtl_shared_event *mtl_handle;
+};
+
+extern const struct vk_sync_type kk_sync_type;
+
+#endif
diff --git a/src/kosmickrisp/vulkan/kk_wsi.c b/src/kosmickrisp/vulkan/kk_wsi.c
new file mode 100644
index 00000000000..5912e151ef7
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_wsi.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "kk_wsi.h"
+#include "kk_cmd_buffer.h"
+#include "kk_device.h"
+#include "kk_dispatch_trampolines.h"
+#include "kk_image.h"
+#include "kk_instance.h"
+#include "wsi_common.h"
+
+#include "kosmickrisp/bridge/mtl_bridge.h"
+
+static PFN_vkVoidFunction
+kk_instance_get_proc_addr_unchecked(const struct vk_instance *instance,
+ const char *name)
+{
+ PFN_vkVoidFunction func;
+
+ if (instance == NULL || name == NULL)
+ return NULL;
+
+ func = vk_instance_dispatch_table_get(&instance->dispatch_table, name);
+ if (func != NULL)
+ return func;
+
+ func = vk_physical_device_dispatch_table_get(&kk_physical_device_trampolines,
+ name);
+ if (func != NULL)
+ return func;
+
+ func = vk_device_dispatch_table_get(&kk_device_trampolines, name);
+ if (func != NULL)
+ return func;
+
+ return NULL;
+}
+
+static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+kk_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
+{
+ VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice);
+ return kk_instance_get_proc_addr_unchecked(pdev->vk.instance, pName);
+}
+
+static VkResult
+kk_bind_drawable_to_vkimage(VkImage vk_image, void *drawable)
+{
+ VK_FROM_HANDLE(kk_image, image, vk_image);
+ mtl_texture *texture = mtl_drawable_get_texture(drawable);
+
+ /* This should only be called for swapchain binding. */
+ assert(image->plane_count == 1);
+ struct kk_image_plane *plane = &image->planes[0];
+ if (plane->mtl_handle)
+ mtl_release(plane->mtl_handle);
+ if (plane->mtl_handle_array)
+ mtl_release(plane->mtl_handle_array);
+ plane->mtl_handle = mtl_retain(texture);
+ plane->mtl_handle_array = NULL;
+ plane->addr = mtl_texture_get_gpu_resource_id(texture);
+
+ return VK_SUCCESS;
+}
+
+static void
+kk_encode_drawable_present(VkCommandBuffer vk_cmd, void *drawable)
+{
+ VK_FROM_HANDLE(kk_cmd_buffer, cmd, vk_cmd);
+ mtl_retain(drawable);
+ cmd->drawable = drawable;
+}
+
+static struct vk_queue *
+kk_get_blit_queue(VkDevice device)
+{
+ /* We only have one queue, so just return that one. */
+ VK_FROM_HANDLE(kk_device, dev, device);
+ return &dev->queue.vk;
+}
+
+VkResult
+kk_init_wsi(struct kk_physical_device *pdev)
+{
+ struct wsi_device_options wsi_options = {.sw_device = false};
+ struct wsi_device *wsi = &pdev->wsi_device;
+ VkResult result =
+ wsi_device_init(wsi, kk_physical_device_to_handle(pdev), kk_wsi_proc_addr,
+ &pdev->vk.instance->alloc,
+ 0u, // Not relevant for metal wsi
+ NULL, // Not relevant for metal
+ &wsi_options);
+ if (result != VK_SUCCESS)
+ return result;
+
+ wsi->metal.bind_drawable_to_vkimage = kk_bind_drawable_to_vkimage;
+ wsi->metal.encode_drawable_present = kk_encode_drawable_present;
+ wsi->get_blit_queue = kk_get_blit_queue;
+
+ pdev->vk.wsi_device = wsi;
+
+ return result;
+}
+
+void
+kk_finish_wsi(struct kk_physical_device *pdev)
+{
+ pdev->vk.wsi_device = NULL;
+ wsi_device_finish(&pdev->wsi_device, &pdev->vk.instance->alloc);
+}
diff --git a/src/kosmickrisp/vulkan/kk_wsi.h b/src/kosmickrisp/vulkan/kk_wsi.h
new file mode 100644
index 00000000000..0a9fbd214b3
--- /dev/null
+++ b/src/kosmickrisp/vulkan/kk_wsi.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
+ * Copyright 2025 LunarG, Inc.
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef KK_WSI_H
+#define KK_WSI_H 1
+
+#include "kk_physical_device.h"
+
+VkResult kk_init_wsi(struct kk_physical_device *pdev);
+void kk_finish_wsi(struct kk_physical_device *pdev);
+
+#endif /* KK_WSI_H */
diff --git a/src/kosmickrisp/vulkan/meson.build b/src/kosmickrisp/vulkan/meson.build
new file mode 100644
index 00000000000..41cfa8a6fec
--- /dev/null
+++ b/src/kosmickrisp/vulkan/meson.build
@@ -0,0 +1,211 @@
+# Copyright 2025 LunarG, Inc.
+# Copyright 2025 Google LLC
+# SPDX-License-Identifier: MIT
+
+kk_device_dispatch_table_dependencies = declare_dependency(
+ sources : [
+ kk_dispatch_trampolines[1],
+ ],
+)
+
+# Unsure if needed, just making sure we don't royally mess up the dependencies
+kk_device_dispatch_table_dependencies_list = [
+ vulkan_lite_runtime_deps,
+ kk_device_dispatch_table_dependencies,
+ idep_nir,
+ idep_mesautil,
+ idep_vulkan_runtime,
+ idep_vulkan_util,
+ idep_vulkan_wsi,
+ idep_vulkan_wsi_headers,
+]
+
+libkk_device_dispatch_table = static_library(
+ 'kk_device_dispatch_table',
+ kk_dispatch_trampolines,
+ include_directories : [inc_include, inc_src],
+ dependencies : kk_device_dispatch_table_dependencies_list,
+ c_args : c_msvc_compat_args,
+ gnu_symbol_visibility : 'hidden',
+ build_by_default : false,
+)
+
+idep_kk_device_dispatch_table = declare_dependency(
+ # Instruct users of this library to link with --whole-archive. Otherwise,
+ # our weak function overloads may not resolve properly.
+ link_whole : [libkk_device_dispatch_table]
+)
+
+kk_files = files(
+ 'kk_bo.h',
+ 'kk_bo.c',
+ 'kk_buffer_view.h',
+ 'kk_buffer_view.c',
+ 'kk_buffer.h',
+ 'kk_buffer.c',
+ 'kk_cmd_buffer.h',
+ 'kk_cmd_buffer.c',
+ 'kk_cmd_clear.c',
+ 'kk_cmd_copy.c',
+ 'kk_cmd_dispatch.c',
+ 'kk_cmd_draw.c',
+ 'kk_cmd_meta.c',
+ 'kk_cmd_pool.h',
+ 'kk_cmd_pool.c',
+ 'kk_debug.h',
+ 'kk_debug.c',
+ 'kk_descriptor_set_layout.h',
+ 'kk_descriptor_set_layout.c',
+ 'kk_descriptor_set.h',
+ 'kk_descriptor_set.c',
+ 'kk_device_lib.c',
+ 'kk_device_memory.h',
+ 'kk_device_memory.c',
+ 'kk_device.h',
+ 'kk_device.c',
+ 'kk_encoder.h',
+ 'kk_encoder.c',
+ 'kk_event.h',
+ 'kk_event.c',
+ 'kk_format.h',
+ 'kk_format.c',
+ 'kk_image_layout.h',
+ 'kk_image_layout.c',
+ 'kk_image_view.h',
+ 'kk_image_view.c',
+ 'kk_image.h',
+ 'kk_image.c',
+ 'kk_instance.h',
+ 'kk_instance.c',
+ 'kk_nir_lower_descriptors.c',
+ 'kk_nir_lower_multiview.c',
+ 'kk_nir_lower_textures.c',
+ 'kk_nir_lower_vbo.h',
+ 'kk_nir_lower_vbo.c',
+ 'kk_physical_device.h',
+ 'kk_physical_device.c',
+ 'kk_query_pool.h',
+ 'kk_query_pool.c',
+ 'kk_query_table.h',
+ 'kk_query_table.c',
+ 'kk_queue.h',
+ 'kk_queue.c',
+ 'kk_sampler.h',
+ 'kk_sampler.c',
+ 'kk_shader.h',
+ 'kk_shader.c',
+ 'kk_sync.h',
+ 'kk_sync.c',
+ 'kk_wsi.h',
+ 'kk_wsi.c',
+)
+
+kkcl_files = files(
+ 'cl/kk_query.cl',
+ 'cl/kk_triangle_fan.cl',
+)
+
+kk_entrypoints = custom_target(
+ 'kk_entrypoints',
+ input : [vk_entrypoints_gen, vk_api_xml],
+ output : ['kk_entrypoints.h', 'kk_entrypoints.c'],
+ command : [
+ prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
+ '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'kk',
+ '--beta', with_vulkan_beta.to_string()
+ ],
+ depend_files : vk_entrypoints_gen_depend_files,
+)
+
+relative_dir = fs.relative_to(meson.global_source_root(), meson.global_build_root())
+
+kkcl_spv = custom_target(
+ input : kkcl_files,
+ output : 'kkcl.spv',
+ command : [
+ prog_mesa_clc, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@', kkcl_files, '--',
+ '-I' + join_paths(meson.project_source_root(), 'src/compiler/libcl'),
+ '-I' + join_paths(meson.current_source_dir(), '.'),
+ '-I' + join_paths(meson.project_source_root(), 'src'),
+ cl_args,
+ ],
+ depfile : 'libkk_shaders.h.d',
+)
+
+kkcl = custom_target(
+ input : kkcl_spv,
+ output : ['kkcl.cpp', 'kkcl.h'],
+ command : [prog_vtn_bindgen2, '@INPUT@', '@OUTPUT@'],
+)
+
+kk_deps = [
+ idep_nir,
+ idep_mesautil,
+ idep_vulkan_runtime,
+ idep_vulkan_util,
+ idep_vulkan_wsi,
+ idep_vulkan_wsi_headers,
+ kk_device_dispatch_table_dependencies,
+ idep_kk_device_dispatch_table,
+ idep_msl_to_nir,
+ idep_mtl_bridge
+]
+
+libkk = static_library(
+ 'kk',
+ [
+ kk_files,
+ kk_entrypoints,
+ kkcl,
+ sha1_h,
+ ],
+ include_directories : [
+ inc_include,
+ inc_src,
+ ],
+ dependencies : [kk_deps],
+ gnu_symbol_visibility : 'hidden',
+)
+
+# Happens with macOS ld linker:
+# "-Wl,-undefined,dynamic_lookup" required, otherwise, linker will complain about undefined symbols
+libvulkan_kosmickrisp = shared_library(
+ 'vulkan_kosmickrisp',
+ link_whole: [libkk],
+ link_args: [ld_args_build_id, '-Wl,-undefined,dynamic_lookup'],
+ gnu_symbol_visibility : 'hidden',
+ install : true,
+)
+
+kosmickrisp_mesa_icd = custom_target(
+ 'kosmickrisp_mesa_icd',
+ input : [vk_icd_gen, vk_api_xml],
+ output : 'kosmickrisp_mesa_icd.@0@.json'.format(host_machine.cpu()),
+ command : [
+ prog_python, '@INPUT0@',
+ '--api-version', '1.3', '--xml', '@INPUT1@',
+ '--lib-path', get_option('prefix') / get_option('libdir') / 'libvulkan_kosmickrisp.dylib',
+ '--out', '@OUTPUT@',
+ ],
+ build_by_default : true,
+ install_dir : with_vulkan_icd_dir,
+ install_tag : 'runtime',
+ install : true,
+)
+
+kosmickrisp_icd = custom_target(
+ 'kosmickrisp_mesa_devenv_icd',
+ input : [vk_icd_gen, vk_api_xml],
+ output : 'kosmickrisp_mesa_devenv_icd.@0@.json'.format(host_machine.cpu()),
+ command : [
+ prog_python, '@INPUT0@',
+ '--api-version', '1.3', '--xml', '@INPUT1@',
+ '--lib-path', meson.current_build_dir() / 'libvulkan_kosmickrisp.dylib',
+ '--out', '@OUTPUT@',
+ ],
+ build_by_default : true,
+)
+
+devenv.append('VK_DRIVER_FILES', kosmickrisp_icd.full_path())
+# Deprecated: replaced by VK_DRIVER_FILES above
+devenv.append('VK_ICD_FILENAMES', kosmickrisp_icd.full_path())
diff --git a/src/meson.build b/src/meson.build
index d08e26466da..f27dae33631 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -114,6 +114,9 @@ endif
if with_gallium_asahi or with_asahi_vk or with_tools.contains('asahi')
subdir('asahi')
endif
+if with_kosmickrisp_vk
+ subdir('kosmickrisp')
+endif
if with_gallium
subdir('mesa')
endif