diff options
| author | Aitor Camacho <aitor@lunarg.com> | 2025-09-23 10:24:52 +0200 |
|---|---|---|
| committer | Marge Bot <marge-bot@fdo.invalid> | 2025-10-20 17:46:38 +0000 |
| commit | 7c268a1e918544ebe8669ecf7c78d34a9eaf92c1 (patch) | |
| tree | d186c7a9d2f5f4578b0ba50e5931e8e2c5b0eda5 | |
| parent | f6c7f16322de7088fef812890e30a697c4f2eaf2 (diff) | |
kk: Add KosmicKrisp
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37522>
130 files changed, 24271 insertions, 2 deletions
diff --git a/meson.build b/meson.build index ec978d02cdd..80917def523 100644 --- a/meson.build +++ b/meson.build @@ -271,7 +271,7 @@ elif _vulkan_drivers.contains('all') _vulkan_drivers = ['amd', 'intel', 'intel_hasvk', 'swrast', 'freedreno', 'panfrost', 'virtio', 'broadcom', 'imagination', 'microsoft-experimental', - 'nouveau', 'asahi', 'gfxstream'] + 'nouveau', 'asahi', 'gfxstream', 'kosmickrisp'] endif with_intel_vk = _vulkan_drivers.contains('intel') @@ -288,6 +288,7 @@ with_microsoft_vk = _vulkan_drivers.contains('microsoft-experimental') with_nouveau_vk = _vulkan_drivers.contains('nouveau') with_asahi_vk = _vulkan_drivers.contains('asahi') with_gfxstream_vk = _vulkan_drivers.contains('gfxstream') +with_kosmickrisp_vk = _vulkan_drivers.contains('kosmickrisp') with_any_vk = _vulkan_drivers.length() != 0 with_llvm = with_llvm \ @@ -829,6 +830,7 @@ with_driver_using_cl = [ with_gallium_asahi, with_asahi_vk, with_tools.contains('asahi'), with_gallium_panfrost, with_panfrost_vk, with_nouveau_vk, with_imagination_vk, + with_kosmickrisp_vk, ].contains(true) if get_option('mesa-clc') == 'system' diff --git a/meson.options b/meson.options index b1f98d7452a..75731475c12 100644 --- a/meson.options +++ b/meson.options @@ -209,7 +209,7 @@ option( choices : ['auto', 'amd', 'broadcom', 'freedreno', 'intel', 'intel_hasvk', 'panfrost', 'swrast', 'virtio', 'imagination', 'microsoft-experimental', 'nouveau', 'asahi', 'gfxstream', - 'all'], + 'kosmickrisp', 'all'], description : 'List of vulkan drivers to build. If this is set to auto ' + 'all drivers applicable to the target OS/architecture ' + 'will be built' diff --git a/src/kosmickrisp/.clang-format b/src/kosmickrisp/.clang-format new file mode 100644 index 00000000000..91bc46f7dbf --- /dev/null +++ b/src/kosmickrisp/.clang-format @@ -0,0 +1,7 @@ +BasedOnStyle: InheritParentConfig +DisableFormat: false + +AlignConsecutiveBitFields: true +ColumnLimit: 80 +BreakStringLiterals: false +SpaceBeforeParens: ControlStatementsExceptControlMacros diff --git a/src/kosmickrisp/bridge/meson.build b/src/kosmickrisp/bridge/meson.build new file mode 100644 index 00000000000..b2f5306bd9b --- /dev/null +++ b/src/kosmickrisp/bridge/meson.build @@ -0,0 +1,61 @@ +# Copyright 2025 LunarG, Inc. +# Copyright 2025 Google LLC +# SPDX-License-Identifier: MIT + +mtl_bridge_files = files( + 'vk_to_mtl_map.h', + 'vk_to_mtl_map.c', + 'mtl_format.h', +) + +if host_machine.system() == 'darwin' + mtl_bridge_files += files( + 'mtl_bridge.m', + 'mtl_buffer.m', + 'mtl_command_buffer.m', + 'mtl_command_queue.m', + 'mtl_compute_state.m', + 'mtl_device.m', + 'mtl_encoder.m', + 'mtl_heap.m', + 'mtl_library.m', + 'mtl_render_state.m', + 'mtl_sampler.m', + 'mtl_sync.m', + 'mtl_texture.m', + ) +else + mtl_bridge_files += files( + 'stubs/mtl_bridge.c', + 'stubs/mtl_buffer.c', + 'stubs/mtl_command_buffer.c', + 'stubs/mtl_command_queue.c', + 'stubs/mtl_compute_state.c', + 'stubs/mtl_device.c', + 'stubs/mtl_encoder.c', + 'stubs/mtl_heap.c', + 'stubs/mtl_library.c', + 'stubs/mtl_render_state.c', + 'stubs/mtl_sampler.c', + 'stubs/mtl_sync.c', + 'stubs/mtl_texture.c', + ) +endif + +mtl_bridge_dependencies = [ + idep_vulkan_lite_runtime_headers, + idep_vulkan_util_headers +] + +libmtl_bridge = static_library( + 'mtl_bridge', + [mtl_bridge_files], + include_directories : [include_directories('../vulkan/'), inc_include, inc_src], + dependencies : mtl_bridge_dependencies, + gnu_symbol_visibility: 'hidden', + build_by_default: false, +) + +idep_mtl_bridge = declare_dependency( + link_with : libmtl_bridge, +) diff --git a/src/kosmickrisp/bridge/mtl_bridge.h b/src/kosmickrisp/bridge/mtl_bridge.h new file mode 100644 index 00000000000..bcb8245d42c --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_bridge.h @@ -0,0 +1,43 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_BRIDGE_H +#define KK_BRIDGE_H 1 + +/* C wrappers for Metal. May not be complete. If you find something you need + * feel free to add them where they belong. As a rule of thumb, member functions + * go in the objects' .h/.m/.c Naming convention for wrappers is: + * object_type* mtl_new_object_type(params...); + * void mtl_member_function(object_type* ptr, params...); + * void mtl_object_set_member(object_type* ptr, member_type value); + * member_type mtl_object_get_member(object_type* ptr); + * + * Functions that have new in the name require to release the returned object + * via mtl_release(object); + * */ + +#include "mtl_types.h" + +#include "mtl_buffer.h" +#include "mtl_command_buffer.h" +#include "mtl_command_queue.h" +#include "mtl_compute_state.h" +#include "mtl_device.h" +#include "mtl_encoder.h" +#include "mtl_format.h" +#include "mtl_heap.h" +#include "mtl_library.h" +#include "mtl_render_state.h" +#include "mtl_sampler.h" +#include "mtl_sync.h" +#include "mtl_texture.h" + +mtl_texture *mtl_drawable_get_texture(void *drawable_ptr); + +void *mtl_retain(void *handle); +void mtl_release(void *handle); + +#endif /* KK_BRIDGE_H */ diff --git a/src/kosmickrisp/bridge/mtl_bridge.m b/src/kosmickrisp/bridge/mtl_bridge.m new file mode 100644 index 00000000000..a5ef514f630 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_bridge.m @@ -0,0 +1,50 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_bridge.h" + +// kk_image_layout.h should also includes "vulkan/vulkan.h", but just to be safe +#include "vulkan/vulkan.h" +#include "kk_image_layout.h" + +#include "util/macros.h" + +#include <Metal/MTLCommandBuffer.h> +#include <Metal/MTLCommandQueue.h> +#include <Metal/MTLDevice.h> +#include <Metal/MTLHeap.h> +#include <Metal/MTLEvent.h> + +#include <QuartzCore/CAMetalLayer.h> + +static_assert(sizeof(MTLResourceID) == sizeof(uint64_t), "Must match, otherwise descriptors are broken"); + +mtl_texture * +mtl_drawable_get_texture(void *drawable_ptr) +{ + @autoreleasepool { + id<CAMetalDrawable> drawable = (id<CAMetalDrawable>)drawable_ptr; + return drawable.texture; + } +} + +void * +mtl_retain(void *handle) +{ + @autoreleasepool { + NSObject *obj = (NSObject *)handle; + return [obj retain]; + } +} + +void +mtl_release(void *handle) +{ + @autoreleasepool { + NSObject *obj = (NSObject *)handle; + [obj release]; + } +} diff --git a/src/kosmickrisp/bridge/mtl_buffer.h b/src/kosmickrisp/bridge/mtl_buffer.h new file mode 100644 index 00000000000..fc8cd21e5fa --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_buffer.h @@ -0,0 +1,26 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef MTL_BUFFER_H +#define MTL_BUFFER_H 1 + +#include "mtl_types.h" + +#include <inttypes.h> + +struct kk_image_layout; + +/* Utils */ +uint64_t mtl_buffer_get_length(mtl_buffer *buffer); +uint64_t mtl_buffer_get_gpu_address(mtl_buffer *buffer); +/* Gets CPU address */ +void *mtl_get_contents(mtl_buffer *buffer); + +/* Allocation from buffer */ +mtl_texture *mtl_new_texture_with_descriptor_linear( + mtl_buffer *buffer, const struct kk_image_layout *layout, uint64_t offset); + +#endif /* MTL_BUFFER_H */
\ No newline at end of file diff --git a/src/kosmickrisp/bridge/mtl_buffer.m b/src/kosmickrisp/bridge/mtl_buffer.m new file mode 100644 index 00000000000..dedb309e7fd --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_buffer.m @@ -0,0 +1,78 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_buffer.h" + +/* TODO_KOSMICKRISP Remove */ +#include "kk_image_layout.h" + +#include <Metal/MTLBuffer.h> +#include <Metal/MTLTexture.h> + +uint64_t +mtl_buffer_get_length(mtl_buffer *buffer) +{ + @autoreleasepool { + id<MTLBuffer> buf = (id<MTLBuffer>)buffer; + return buf.length; + } +} + +uint64_t +mtl_buffer_get_gpu_address(mtl_buffer *buffer) +{ + @autoreleasepool { + id<MTLBuffer> buf = (id<MTLBuffer>)buffer; + return [buf gpuAddress]; + } +} + +void * +mtl_get_contents(mtl_buffer *buffer) +{ + @autoreleasepool { + id<MTLBuffer> buf = (id<MTLBuffer>)buffer; + return [buf contents]; + } +} + +/* TODO_KOSMICKRISP This is a duplicate, but both should be removed once we move kk_image_layout to the bridge. */ +static MTLTextureDescriptor * +mtl_new_texture_descriptor(const struct kk_image_layout *layout) +{ + @autoreleasepool { + MTLTextureDescriptor *descriptor = [MTLTextureDescriptor new]; + descriptor.textureType = (MTLTextureType)layout->type; + descriptor.pixelFormat = layout->format.mtl; + descriptor.width = layout->width_px; + descriptor.height = layout->height_px; + descriptor.depth = layout->depth_px; + descriptor.mipmapLevelCount = layout->levels; + descriptor.sampleCount = layout->sample_count_sa; + descriptor.arrayLength = layout->layers; + descriptor.allowGPUOptimizedContents = layout->optimized_layout; + descriptor.usage = (MTLTextureUsage)layout->usage; + /* We don't set the swizzle because Metal complains when the usage has store or render target with swizzle... */ + + return descriptor; + } +} + +mtl_texture * +mtl_new_texture_with_descriptor_linear(mtl_buffer *buffer, + const struct kk_image_layout *layout, + uint64_t offset) +{ + @autoreleasepool { + id<MTLBuffer> buf = (id<MTLBuffer>)buffer; + MTLTextureDescriptor *descriptor = [mtl_new_texture_descriptor(layout) autorelease]; + descriptor.resourceOptions = buf.resourceOptions; + id<MTLTexture> texture = [buf newTextureWithDescriptor:descriptor offset:offset bytesPerRow:layout->linear_stride_B]; + + return texture; + } +} + diff --git a/src/kosmickrisp/bridge/mtl_command_buffer.h b/src/kosmickrisp/bridge/mtl_command_buffer.h new file mode 100644 index 00000000000..6567c0a278e --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_command_buffer.h @@ -0,0 +1,27 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef MTL_COMMAND_BUFFER_H +#define MTL_COMMAND_BUFFER_H 1 + +#include "mtl_types.h" + +#include <stdint.h> + +void mtl_encode_signal_event(mtl_command_buffer *cmd_buf_handle, + mtl_event *event_handle, uint64_t value); + +void mtl_encode_wait_for_event(mtl_command_buffer *cmd_buf_handle, + mtl_event *event_handle, uint64_t value); + +void mtl_add_completed_handler(mtl_command_buffer *cmd, + void (*callback)(void *data), void *data); + +void mtl_command_buffer_commit(mtl_command_buffer *cmd_buf); + +void mtl_present_drawable(mtl_command_buffer *cmd_buf, void *drawable); + +#endif /* MTL_COMMAND_BUFFER_H */ diff --git a/src/kosmickrisp/bridge/mtl_command_buffer.m b/src/kosmickrisp/bridge/mtl_command_buffer.m new file mode 100644 index 00000000000..3086bd293c2 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_command_buffer.m @@ -0,0 +1,64 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_command_buffer.h" + +#include <Metal/MTLCommandBuffer.h> +#include <QuartzCore/CAMetalLayer.h> + +void +mtl_encode_signal_event(mtl_command_buffer *cmd_buf_handle, + mtl_event *event_handle, uint64_t value) +{ + @autoreleasepool { + id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>)cmd_buf_handle; + id<MTLEvent> event = (id<MTLEvent>)event_handle; + [cmd_buf encodeSignalEvent:event value:value]; + } +} + +void +mtl_encode_wait_for_event(mtl_command_buffer *cmd_buf_handle, + mtl_event *event_handle, uint64_t value) +{ + @autoreleasepool { + id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>)cmd_buf_handle; + id<MTLEvent> event = (id<MTLEvent>)event_handle; + [cmd_buf encodeWaitForEvent:event value:value]; + } +} + +void +mtl_add_completed_handler(mtl_command_buffer *cmd, void (*callback)(void *data), + void *data) +{ + @autoreleasepool { + id<MTLCommandBuffer> mtl_cmd = (id<MTLCommandBuffer>)cmd; + [mtl_cmd addCompletedHandler:^(id<MTLCommandBuffer> _Nonnull cmd_buf) { + if (callback) + callback(data); + }]; + } +} + +void +mtl_command_buffer_commit(mtl_command_buffer *cmd_buffer) +{ + @autoreleasepool { + id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>)cmd_buffer; + [cmd_buf commit]; + } +} + +void +mtl_present_drawable(mtl_command_buffer *cmd_buf, void *drawable_ptr) +{ + @autoreleasepool { + id<MTLCommandBuffer> cmd = (id<MTLCommandBuffer>)cmd_buf; + id<CAMetalDrawable> drawable = [(id<CAMetalDrawable>)drawable_ptr autorelease]; + [cmd presentDrawable:drawable]; + } +} diff --git a/src/kosmickrisp/bridge/mtl_command_queue.h b/src/kosmickrisp/bridge/mtl_command_queue.h new file mode 100644 index 00000000000..8cc0149d3a6 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_command_queue.h @@ -0,0 +1,19 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef MTL_COMMAND_QUEUE_H +#define MTL_COMMAND_QUEUE_H 1 + +#include "mtl_types.h" + +#include <stdint.h> + +mtl_command_queue *mtl_new_command_queue(mtl_device *device, + uint32_t cmd_buffer_count); + +mtl_command_buffer *mtl_new_command_buffer(mtl_command_queue *cmd_queue); + +#endif /* MTL_COMMAND_QUEUE_H */
\ No newline at end of file diff --git a/src/kosmickrisp/bridge/mtl_command_queue.m b/src/kosmickrisp/bridge/mtl_command_queue.m new file mode 100644 index 00000000000..7f36cc69d9c --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_command_queue.m @@ -0,0 +1,28 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_command_queue.h" + +#include <Metal/MTLDevice.h> +#include <Metal/MTLCommandQueue.h> + +mtl_command_queue * +mtl_new_command_queue(mtl_device *device, uint32_t cmd_buffer_count) +{ + @autoreleasepool { + id<MTLDevice> dev = (id<MTLDevice>)device; + return [dev newCommandQueueWithMaxCommandBufferCount:cmd_buffer_count]; + } +} + +mtl_command_buffer * +mtl_new_command_buffer(mtl_command_queue *cmd_queue) +{ + @autoreleasepool { + id<MTLCommandQueue> queue = (id<MTLCommandQueue>)cmd_queue; + return [[queue commandBuffer] retain]; + } +} diff --git a/src/kosmickrisp/bridge/mtl_compute_state.h b/src/kosmickrisp/bridge/mtl_compute_state.h new file mode 100644 index 00000000000..f47a9e7e9c5 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_compute_state.h @@ -0,0 +1,13 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_types.h" + +#include <stdint.h> + +mtl_compute_pipeline_state * +mtl_new_compute_pipeline_state(mtl_device *device, mtl_function *function, + uint64_t max_total_threads_per_threadgroup);
\ No newline at end of file diff --git a/src/kosmickrisp/bridge/mtl_compute_state.m b/src/kosmickrisp/bridge/mtl_compute_state.m new file mode 100644 index 00000000000..f6ebce751b5 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_compute_state.m @@ -0,0 +1,29 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_compute_state.h" + +#include <Metal/MTLComputePipeline.h> + +mtl_compute_pipeline_state * +mtl_new_compute_pipeline_state(mtl_device *device, mtl_function *function, + uint64_t max_total_threads_per_threadgroup) +{ + @autoreleasepool { + id<MTLDevice> dev = (id<MTLDevice>)device; + id<MTLComputePipelineState> pipeline = NULL; + + MTLComputePipelineDescriptor *comp_desc = [[[MTLComputePipelineDescriptor alloc] init] autorelease]; + NSError *error; + comp_desc.computeFunction = (id<MTLFunction>)function; + comp_desc.maxTotalThreadsPerThreadgroup = max_total_threads_per_threadgroup; + pipeline = [dev newComputePipelineStateWithDescriptor:comp_desc options:0 reflection:nil error:&error]; + + /* TODO_KOSMICKRISP Error checking */ + + return pipeline; + } +} diff --git a/src/kosmickrisp/bridge/mtl_device.h b/src/kosmickrisp/bridge/mtl_device.h new file mode 100644 index 00000000000..30ee1b0967e --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_device.h @@ -0,0 +1,40 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef MTL_DEVICE_H +#define MTL_DEVICE_H 1 + +#include "mtl_types.h" + +#include <stdint.h> + +/* TODO_KOSMICKRISP Remove */ +struct kk_image_layout; + +/* Device creation */ +mtl_device *mtl_device_create(void); + +/* Device operations */ +void mtl_start_gpu_capture(mtl_device *mtl_dev_handle); +void mtl_stop_gpu_capture(void); + +/* Device feature query */ +void mtl_device_get_name(mtl_device *dev, char buffer[256]); +void mtl_device_get_architecture_name(mtl_device *dev, char buffer[256]); +uint64_t mtl_device_get_peer_group_id(mtl_device *dev); +uint32_t mtl_device_get_peer_index(mtl_device *dev); +uint64_t mtl_device_get_registry_id(mtl_device *dev); +struct mtl_size mtl_device_max_threads_per_threadgroup(mtl_device *dev); + +/* Resource queries */ +void mtl_heap_buffer_size_and_align_with_length(mtl_device *device, + uint64_t *size_B, + uint64_t *align_B); +void +mtl_heap_texture_size_and_align_with_descriptor(mtl_device *device, + struct kk_image_layout *layout); + +#endif /* MTL_DEVICE_H */
\ No newline at end of file diff --git a/src/kosmickrisp/bridge/mtl_device.m b/src/kosmickrisp/bridge/mtl_device.m new file mode 100644 index 00000000000..aab0e735ff6 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_device.m @@ -0,0 +1,197 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_device.h" + +/* TODO_KOSMICKRISP Remove */ +#include "kk_image_layout.h" +#include "kk_private.h" + +#include <Metal/MTLDevice.h> +#include <Metal/MTLCaptureManager.h> + +/* Device creation */ +mtl_device * +mtl_device_create() +{ + mtl_device *device = 0u; + + @autoreleasepool { + NSArray<id<MTLDevice>> *devs = MTLCopyAllDevices(); + uint32_t device_count = [devs count]; + + for (uint32_t i = 0u; i < device_count; ++i) { + if (@available(macOS 10.15, *)) { + if (!device && [devs[i] supportsFamily:MTLGPUFamilyMetal3]) { + device = (mtl_device *)[devs[i] retain]; + } + [devs[i] autorelease]; + } + } + + return device; + } +} + +/* Device operations */ +void +mtl_start_gpu_capture(mtl_device *mtl_dev_handle) +{ + @autoreleasepool { + id<MTLDevice> mtl_dev = (id<MTLDevice>)mtl_dev_handle; + MTLCaptureManager *captureMgr = [MTLCaptureManager sharedCaptureManager]; + + // Before macOS 10.15 and iOS 13.0, captureDesc will just be nil + MTLCaptureDescriptor *captureDesc = [[MTLCaptureDescriptor new] autorelease]; + captureDesc.captureObject = mtl_dev; + captureDesc.destination = MTLCaptureDestinationDeveloperTools; + + // TODO_KOSMICKRISP Support dumping a trace to a file? + // NSString *tmp_dir = NSTemporaryDirectory(); + // NSString *pname = [[NSProcessInfo processInfo] processName]; + // NSString *capture_path = [NSString stringWithFormat:@"%@/%@.gputrace", tmp_dir, pname]; + // if ([captureMgr supportsDestination: MTLCaptureDestinationGPUTraceDocument] ) { + // captureDesc.destination = MTLCaptureDestinationGPUTraceDocument; + // captureDesc.outputURL = [NSURL fileURLWithPath: capture_path]; + //} + + NSError *err = nil; + if (![captureMgr startCaptureWithDescriptor:captureDesc error:&err]) { + // fprintf(stderr, "Failed to automatically start GPU capture session (Error code %li) using startCaptureWithDescriptor: %s\n", + // (long)err.code, err.localizedDescription.UTF8String); + // fprintf(stderr, "Using startCaptureWithDevice\n"); + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + [captureMgr startCaptureWithDevice:mtl_dev]; +#pragma clang diagnostic pop + } + + //[tmp_dir release]; + //[pname release]; + //[capture_path release]; + } +} + +void +mtl_stop_gpu_capture() +{ + @autoreleasepool { + [[MTLCaptureManager sharedCaptureManager] stopCapture]; + } +} + +/* Device feature query */ +void +mtl_device_get_name(mtl_device *dev, char buffer[256]) +{ + @autoreleasepool { + id<MTLDevice> device = (id<MTLDevice>)dev; + [device.name getCString:buffer maxLength:(sizeof(char) * 256) encoding:NSUTF8StringEncoding]; + } +} + +void +mtl_device_get_architecture_name(mtl_device *dev, char buffer[256]) +{ + @autoreleasepool { + id<MTLDevice> device = (id<MTLDevice>)dev; + [device.architecture.name getCString:buffer maxLength:(sizeof(char) * 256) encoding:NSUTF8StringEncoding]; + } +} + +uint64_t +mtl_device_get_peer_group_id(mtl_device *dev) +{ + @autoreleasepool { + id<MTLDevice> device = (id<MTLDevice>)dev; + return device.peerGroupID; + } +} + +uint32_t +mtl_device_get_peer_index(mtl_device *dev) +{ + @autoreleasepool { + id<MTLDevice> device = (id<MTLDevice>)dev; + return device.peerIndex; + } +} + +uint64_t +mtl_device_get_registry_id(mtl_device *dev) +{ + @autoreleasepool { + id<MTLDevice> device = (id<MTLDevice>)dev; + return device.registryID; + } +} + +struct mtl_size +mtl_device_max_threads_per_threadgroup(mtl_device *dev) +{ + @autoreleasepool { + id<MTLDevice> device = (id<MTLDevice>)dev; + return (struct mtl_size){.x = device.maxThreadsPerThreadgroup.width, + .y = device.maxThreadsPerThreadgroup.height, + .z = device.maxThreadsPerThreadgroup.depth}; + } +} + +/* Resource queries */ +/* TODO_KOSMICKRISP Return a struct */ +void +mtl_heap_buffer_size_and_align_with_length(mtl_device *device, uint64_t *size_B, + uint64_t *align_B) +{ + @autoreleasepool { + id<MTLDevice> dev = (id<MTLDevice>)device; + MTLSizeAndAlign size_align = [dev heapBufferSizeAndAlignWithLength:*size_B options:KK_MTL_RESOURCE_OPTIONS]; + *size_B = size_align.size; + *align_B = size_align.align; + } +} + +/* TODO_KOSMICKRISP Remove */ +static MTLTextureDescriptor * +mtl_new_texture_descriptor(const struct kk_image_layout *layout) +{ + @autoreleasepool { + MTLTextureDescriptor *descriptor = [MTLTextureDescriptor new]; + descriptor.textureType = (MTLTextureType)layout->type; + descriptor.pixelFormat = layout->format.mtl; + descriptor.width = layout->width_px; + descriptor.height = layout->height_px; + descriptor.depth = layout->depth_px; + descriptor.mipmapLevelCount = layout->levels; + descriptor.sampleCount = layout->sample_count_sa; + descriptor.arrayLength = layout->layers; + descriptor.allowGPUOptimizedContents = layout->optimized_layout; + descriptor.usage = (MTLTextureUsage)layout->usage; + /* We don't set the swizzle because Metal complains when the usage has store or render target with swizzle... */ + + return descriptor; + } +} + +void +mtl_heap_texture_size_and_align_with_descriptor(mtl_device *device, + struct kk_image_layout *layout) +{ + @autoreleasepool { + id<MTLDevice> dev = (id<MTLDevice>)device; + if (layout->optimized_layout) { + MTLTextureDescriptor *descriptor = [mtl_new_texture_descriptor(layout) autorelease]; + descriptor.resourceOptions = KK_MTL_RESOURCE_OPTIONS; + MTLSizeAndAlign size_align = [dev heapTextureSizeAndAlignWithDescriptor:descriptor]; + layout->size_B = size_align.size; + layout->align_B = size_align.align; + } else { + /* Linear textures have different alignment since they are allocated on top of MTLBuffers */ + layout->align_B = [dev minimumLinearTextureAlignmentForPixelFormat:layout->format.mtl]; + } + } +} diff --git a/src/kosmickrisp/bridge/mtl_encoder.h b/src/kosmickrisp/bridge/mtl_encoder.h new file mode 100644 index 00000000000..bfd582d571e --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_encoder.h @@ -0,0 +1,152 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef MTL_ENCODER_H +#define MTL_ENCODER_H 1 + +#include "mtl_types.h" + +#include <stdint.h> + +/* Common encoder utils */ +void mtl_end_encoding(void *encoder); + +/* MTLBlitEncoder */ +mtl_blit_encoder *mtl_new_blit_command_encoder(mtl_command_buffer *cmd_buffer); + +void mtl_blit_update_fence(mtl_blit_encoder *encoder, mtl_fence *fence); +void mtl_blit_wait_for_fence(mtl_blit_encoder *encoder, mtl_fence *fence); + +void mtl_copy_from_buffer_to_buffer(mtl_blit_encoder *blit_enc_handle, + mtl_buffer *src_buf, size_t src_offset, + mtl_buffer *dst_buf, size_t dst_offset, + size_t size); + +void mtl_copy_from_buffer_to_texture(mtl_blit_encoder *blit_enc_handle, + struct mtl_buffer_image_copy *data); + +void mtl_copy_from_texture_to_buffer(mtl_blit_encoder *blit_enc_handle, + struct mtl_buffer_image_copy *data); + +void mtl_copy_from_texture_to_texture( + mtl_blit_encoder *blit_enc_handle, mtl_texture *src_tex_handle, + size_t src_slice, size_t src_level, struct mtl_origin src_origin, + struct mtl_size src_size, mtl_texture *dst_tex_handle, size_t dst_slice, + size_t dst_level, struct mtl_origin dst_origin); + +/* MTLComputeEncoder */ +mtl_compute_encoder * +mtl_new_compute_command_encoder(mtl_command_buffer *cmd_buffer); + +void mtl_compute_update_fence(mtl_compute_encoder *encoder, mtl_fence *fence); +void mtl_compute_wait_for_fence(mtl_compute_encoder *encoder, mtl_fence *fence); + +void mtl_compute_set_pipeline_state(mtl_compute_encoder *encoder, + mtl_compute_pipeline_state *state_handle); + +void mtl_compute_set_buffer(mtl_compute_encoder *encoder, mtl_buffer *buffer, + size_t offset, size_t index); + +void mtl_compute_use_resource(mtl_compute_encoder *encoder, + mtl_resource *res_handle, uint32_t usage); + +void mtl_compute_use_resources(mtl_compute_encoder *encoder, + mtl_resource **resource_handles, uint32_t count, + enum mtl_resource_usage usage); + +void mtl_compute_use_heaps(mtl_compute_encoder *encoder, mtl_heap **heaps, + uint32_t count); + +void mtl_dispatch_threads(mtl_compute_encoder *encoder, + struct mtl_size grid_size, + struct mtl_size local_size); + +void mtl_dispatch_threadgroups_with_indirect_buffer( + mtl_compute_encoder *encoder, mtl_buffer *buffer, uint32_t offset, + struct mtl_size local_size); + +/* MTLRenderEncoder */ +mtl_render_encoder *mtl_new_render_command_encoder_with_descriptor( + mtl_command_buffer *command_buffer, mtl_render_pass_descriptor *descriptor); + +void mtl_render_update_fence(mtl_render_encoder *encoder, mtl_fence *fence); +void mtl_render_wait_for_fence(mtl_render_encoder *encoder, mtl_fence *fence); + +void mtl_set_viewports(mtl_render_encoder *encoder, + struct mtl_viewport *viewports, uint32_t count); + +void mtl_set_scissor_rects(mtl_render_encoder *encoder, + struct mtl_scissor_rect *scissor_rects, + uint32_t count); + +void mtl_render_set_pipeline_state(mtl_render_encoder *encoder, + mtl_render_pipeline_state *pipeline); + +void mtl_set_depth_stencil_state(mtl_render_encoder *encoder, + mtl_depth_stencil_state *state); + +void mtl_set_stencil_references(mtl_render_encoder *encoder, uint32_t front, + uint32_t back); + +void mtl_set_front_face_winding(mtl_render_encoder *encoder, + enum mtl_winding winding); + +void mtl_set_cull_mode(mtl_render_encoder *encoder, enum mtl_cull_mode mode); + +void mtl_set_visibility_result_mode(mtl_render_encoder *encoder, + enum mtl_visibility_result_mode mode, + size_t offset); + +void mtl_set_depth_bias(mtl_render_encoder *encoder, float depth_bias, + float slope_scale, float clamp); + +void mtl_set_depth_clip_mode(mtl_render_encoder *encoder, + enum mtl_depth_clip_mode mode); + +void mtl_set_vertex_amplification_count(mtl_render_encoder *encoder, + uint32_t *layer_ids, uint32_t id_count); + +void mtl_set_vertex_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer, + uint32_t offset, uint32_t index); + +void mtl_set_fragment_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer, + uint32_t offset, uint32_t index); + +void mtl_draw_primitives(mtl_render_encoder *encoder, + enum mtl_primitive_type primitve_type, + uint32_t vertexStart, uint32_t vertexCount, + uint32_t instanceCount, uint32_t baseInstance); + +void mtl_draw_indexed_primitives( + mtl_render_encoder *encoder, enum mtl_primitive_type primitve_type, + uint32_t index_count, enum mtl_index_type index_type, + mtl_buffer *index_buffer, uint32_t index_buffer_offset, + uint32_t instance_count, int32_t base_vertex, uint32_t base_instance); + +void mtl_draw_primitives_indirect(mtl_render_encoder *encoder, + enum mtl_primitive_type primitve_type, + mtl_buffer *indirect_buffer, + uint64_t indirect_buffer_offset); + +void mtl_draw_indexed_primitives_indirect(mtl_render_encoder *encoder, + enum mtl_primitive_type primitve_type, + enum mtl_index_type index_type, + mtl_buffer *index_buffer, + uint32_t index_buffer_offset, + mtl_buffer *indirect_buffer, + uint64_t indirect_buffer_offset); + +void mtl_render_use_resource(mtl_compute_encoder *encoder, + mtl_resource *res_handle, uint32_t usage); + +void mtl_render_use_resources(mtl_render_encoder *encoder, + mtl_resource **resource_handles, uint32_t count, + enum mtl_resource_usage usage); + +void mtl_render_use_heaps(mtl_render_encoder *encoder, mtl_heap **heaps, + uint32_t count); + +#endif /* MTL_ENCODER_H */ diff --git a/src/kosmickrisp/bridge/mtl_encoder.m b/src/kosmickrisp/bridge/mtl_encoder.m new file mode 100644 index 00000000000..8385ea8ac0b --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_encoder.m @@ -0,0 +1,537 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_encoder.h" + +#include <Metal/MTLBlitCommandEncoder.h> +#include <Metal/MTLComputeCommandEncoder.h> +#include <Metal/MTLRenderCommandEncoder.h> + +/* Common encoder utils */ +void +mtl_end_encoding(void *encoder) +{ + @autoreleasepool { + id<MTLCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder; + [enc endEncoding]; + } +} + +/* MTLBlitEncoder */ +mtl_blit_encoder * +mtl_new_blit_command_encoder(mtl_command_buffer *cmd_buffer) +{ + @autoreleasepool { + id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>)cmd_buffer; + return [[cmd_buf blitCommandEncoder] retain]; + } +} + +void +mtl_blit_update_fence(mtl_blit_encoder *encoder, + mtl_fence *fence) +{ + @autoreleasepool { + id<MTLBlitCommandEncoder> enc = (id<MTLBlitCommandEncoder>)encoder; + id<MTLFence> f = (id<MTLFence>)fence; + [enc updateFence:f]; + } +} + +void +mtl_blit_wait_for_fence(mtl_blit_encoder *encoder, + mtl_fence *fence) +{ + @autoreleasepool { + id<MTLBlitCommandEncoder> enc = (id<MTLBlitCommandEncoder>)encoder; + id<MTLFence> f = (id<MTLFence>)fence; + [enc waitForFence:f]; + } +} + +void +mtl_copy_from_buffer_to_buffer(mtl_blit_encoder *blit_enc_handle, + mtl_buffer *src_buf, size_t src_offset, + mtl_buffer *dst_buf, size_t dst_offset, + size_t size) +{ + @autoreleasepool { + id<MTLBlitCommandEncoder> blit = (id<MTLBlitCommandEncoder>)blit_enc_handle; + id<MTLBuffer> mtl_src_buffer = (id<MTLBuffer>)src_buf; + id<MTLBuffer> mtl_dst_buffer = (id<MTLBuffer>)dst_buf; + [blit copyFromBuffer:mtl_src_buffer sourceOffset:src_offset toBuffer:mtl_dst_buffer destinationOffset:dst_offset size:size]; + } +} + +void +mtl_copy_from_buffer_to_texture(mtl_blit_encoder *blit_enc_handle, + struct mtl_buffer_image_copy *data) +{ + @autoreleasepool { + const MTLSize size = MTLSizeMake(data->image_size.x, data->image_size.y, data->image_size.z); + const MTLOrigin origin = MTLOriginMake(data->image_origin.x, data->image_origin.y, data->image_origin.z); + id<MTLBlitCommandEncoder> blit = (id<MTLBlitCommandEncoder>)blit_enc_handle; + id<MTLBuffer> buffer = (id<MTLBuffer>)data->buffer; + id<MTLTexture> image = (id<MTLTexture>)data->image; + [blit copyFromBuffer:buffer + sourceOffset:data->buffer_offset_B + sourceBytesPerRow:data->buffer_stride_B + sourceBytesPerImage:data->buffer_2d_image_size_B + sourceSize:size + toTexture:image + destinationSlice:data->image_slice + destinationLevel:data->image_level + destinationOrigin:origin + options:(MTLBlitOption)data->options]; + } +} + +void +mtl_copy_from_texture_to_buffer(mtl_blit_encoder *blit_enc_handle, + struct mtl_buffer_image_copy *data) +{ + @autoreleasepool { + const MTLSize size = MTLSizeMake(data->image_size.x, data->image_size.y, data->image_size.z); + const MTLOrigin origin = MTLOriginMake(data->image_origin.x, data->image_origin.y, data->image_origin.z); + id<MTLBlitCommandEncoder> blit = (id<MTLBlitCommandEncoder>)blit_enc_handle; + id<MTLBuffer> buffer = (id<MTLBuffer>)data->buffer; + id<MTLTexture> image = (id<MTLTexture>)data->image; + [blit copyFromTexture:image + sourceSlice:data->image_slice + sourceLevel:data->image_level + sourceOrigin:origin + sourceSize:size + toBuffer:buffer + destinationOffset:data->buffer_offset_B + destinationBytesPerRow:data->buffer_stride_B + destinationBytesPerImage:data->buffer_2d_image_size_B + options:(MTLBlitOption)data->options]; + } +} + +void +mtl_copy_from_texture_to_texture(mtl_blit_encoder *blit_enc_handle, + mtl_texture *src_tex_handle, size_t src_slice, + size_t src_level, struct mtl_origin src_origin, + struct mtl_size src_size, + mtl_texture *dst_tex_handle, size_t dst_slice, + size_t dst_level, struct mtl_origin dst_origin) +{ + @autoreleasepool { + MTLOrigin mtl_src_origin = MTLOriginMake(src_origin.x, src_origin.y, src_origin.z); + MTLSize mtl_src_size = MTLSizeMake(src_size.x, src_size.y, src_size.z); + MTLOrigin mtl_dst_origin = MTLOriginMake(dst_origin.x, dst_origin.y, dst_origin.z); + id<MTLTexture> mtl_dst_tex = (id<MTLTexture>)dst_tex_handle; + id<MTLBlitCommandEncoder> blit = (id<MTLBlitCommandEncoder>)blit_enc_handle; + id<MTLTexture> mtl_src_tex = (id<MTLTexture>)src_tex_handle; + [blit copyFromTexture:mtl_src_tex + sourceSlice:src_slice + sourceLevel:src_level + sourceOrigin:mtl_src_origin + sourceSize:mtl_src_size + toTexture:mtl_dst_tex + destinationSlice:dst_slice + destinationLevel:dst_level + destinationOrigin:mtl_dst_origin]; + } +} + +/* MTLComputeEncoder */ +mtl_compute_encoder * +mtl_new_compute_command_encoder(mtl_command_buffer *cmd_buffer) +{ + @autoreleasepool { + id<MTLCommandBuffer> cmd_buf = (id<MTLCommandBuffer>)cmd_buffer; + return [[cmd_buf computeCommandEncoder] retain]; + } +} + +void +mtl_compute_update_fence(mtl_compute_encoder *encoder, mtl_fence *fence) +{ + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder; + id<MTLFence> f = (id<MTLFence>)fence; + [enc updateFence:f]; + } +} + +void +mtl_compute_wait_for_fence(mtl_compute_encoder *encoder, mtl_fence *fence) +{ + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder; + id<MTLFence> f = (id<MTLFence>)fence; + [enc waitForFence:f]; + } +} + +void +mtl_compute_set_pipeline_state(mtl_compute_encoder *encoder, + mtl_compute_pipeline_state *state_handle) +{ + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder; + id<MTLComputePipelineState> state = (id<MTLComputePipelineState>)state_handle; + [enc setComputePipelineState:state]; + } +} + +void +mtl_compute_set_buffer(mtl_compute_encoder *encoder, + mtl_buffer *buffer, size_t offset, size_t index) +{ + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder; + id<MTLBuffer> buf = (id<MTLBuffer>)buffer; + [enc setBuffer:buf offset:offset atIndex:index]; + } +} + +void +mtl_compute_use_resource(mtl_compute_encoder *encoder, + mtl_resource *res_handle, uint32_t usage) +{ + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder; + id<MTLResource> res = (id<MTLResource>)res_handle; + [enc useResource:res usage:(MTLResourceUsage)usage]; + } +} + +void +mtl_compute_use_resources(mtl_compute_encoder *encoder, + mtl_resource **resource_handles, uint32_t count, + enum mtl_resource_usage usage) +{ + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder; + id<MTLResource> *handles = (id<MTLResource>*)resource_handles; + [enc useResources:handles count:count usage:(MTLResourceUsage)usage]; + } +} + +void +mtl_compute_use_heaps(mtl_compute_encoder *encoder, mtl_heap **heaps, + uint32_t count) +{ + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder; + id<MTLHeap> *handles = (id<MTLHeap>*)heaps; + [enc useHeaps:handles count:count]; + } +} + +void +mtl_dispatch_threads(mtl_compute_encoder *encoder, + struct mtl_size grid_size, struct mtl_size local_size) +{ + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder; + MTLSize thread_count = MTLSizeMake(grid_size.x * local_size.x, + grid_size.y * local_size.y, + grid_size.z * local_size.z); + MTLSize threads_per_threadgroup = MTLSizeMake(local_size.x, + local_size.y, + local_size.z); + + // TODO_KOSMICKRISP can we rely on nonuniform threadgroup size support? + [enc dispatchThreads:thread_count threadsPerThreadgroup:threads_per_threadgroup]; + } +} + +void +mtl_dispatch_threadgroups_with_indirect_buffer(mtl_compute_encoder *encoder, + mtl_buffer *buffer, + uint32_t offset, + struct mtl_size local_size) +{ + @autoreleasepool { + id<MTLComputeCommandEncoder> enc = (id<MTLComputeCommandEncoder>)encoder; + id<MTLBuffer> buf = (id<MTLBuffer>)buffer; + MTLSize threads_per_threadgroup = MTLSizeMake(local_size.x, + local_size.y, + local_size.z); + + [enc dispatchThreadgroupsWithIndirectBuffer:buf indirectBufferOffset:offset threadsPerThreadgroup:threads_per_threadgroup]; + } +} + +/* MTLRenderEncoder */ + +/* Encoder commands */ +mtl_render_encoder * +mtl_new_render_command_encoder_with_descriptor( + mtl_command_buffer *command_buffer, mtl_render_pass_descriptor *descriptor) +{ + @autoreleasepool { + id<MTLCommandBuffer> cmd = (id<MTLCommandBuffer>)command_buffer; + MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor; + return [[cmd renderCommandEncoderWithDescriptor:desc] retain]; + } +} + +void +mtl_render_update_fence(mtl_render_encoder *encoder, mtl_fence *fence) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLFence> f = (id<MTLFence>)fence; + [enc updateFence:f afterStages:MTLRenderStageFragment]; + } +} + +void +mtl_render_wait_for_fence(mtl_render_encoder *encoder, mtl_fence *fence) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLFence> f = (id<MTLFence>)fence; + [enc waitForFence:f beforeStages:MTLRenderStageVertex]; + } +} + +void +mtl_set_viewports(mtl_render_encoder *encoder, struct mtl_viewport *viewports, + uint32_t count) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + MTLViewport *vps = (MTLViewport *)viewports; + [enc setViewports:vps count:count]; + } +} + +void +mtl_set_scissor_rects(mtl_render_encoder *encoder, + struct mtl_scissor_rect *scissor_rects, uint32_t count) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + MTLScissorRect *rects = (MTLScissorRect *)scissor_rects; + [enc setScissorRects:rects count:count]; + } +} + +void +mtl_render_set_pipeline_state(mtl_render_encoder *encoder, + mtl_render_pipeline_state *pipeline) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLRenderPipelineState> pipe = (id<MTLRenderPipelineState>)pipeline; + [enc setRenderPipelineState:pipe]; + } +} + +void +mtl_set_depth_stencil_state(mtl_render_encoder *encoder, + mtl_depth_stencil_state *state) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLDepthStencilState> s = (id<MTLDepthStencilState>)state; + [enc setDepthStencilState:s]; + } +} + +void +mtl_set_stencil_references(mtl_render_encoder *encoder, uint32_t front, + uint32_t back) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + [enc setStencilFrontReferenceValue:front backReferenceValue:back]; + } +} + +void +mtl_set_front_face_winding(mtl_render_encoder *encoder, + enum mtl_winding winding) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + [enc setFrontFacingWinding:(MTLWinding)winding]; + } +} + +void +mtl_set_cull_mode(mtl_render_encoder *encoder, enum mtl_cull_mode mode) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + [enc setCullMode:(MTLCullMode)mode]; + } +} + +void +mtl_set_visibility_result_mode(mtl_render_encoder *encoder, + enum mtl_visibility_result_mode mode, + size_t offset) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + [enc setVisibilityResultMode:(MTLVisibilityResultMode)mode offset:offset]; + } +} + +void +mtl_set_depth_bias(mtl_render_encoder *encoder, float depth_bias, + float slope_scale, float clamp) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + [enc setDepthBias:depth_bias slopeScale:slope_scale clamp:clamp]; + } +} + +void +mtl_set_depth_clip_mode(mtl_render_encoder *encoder, + enum mtl_depth_clip_mode mode) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + [enc setDepthClipMode:(MTLDepthClipMode)mode]; + } +} + +void +mtl_set_vertex_amplification_count(mtl_render_encoder *encoder, + uint32_t *layer_ids, uint32_t id_count) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + MTLVertexAmplificationViewMapping mappings[32]; + for (uint32_t i = 0u; i < id_count; ++i) { + mappings[i].renderTargetArrayIndexOffset = layer_ids[i]; + mappings[i].viewportArrayIndexOffset = 0u; + } + [enc setVertexAmplificationCount:id_count viewMappings:mappings]; + } +} + +void +mtl_set_vertex_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer, + uint32_t offset, uint32_t index) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLBuffer> buf = (id<MTLBuffer>)buffer; + [enc setVertexBuffer:buf offset:offset atIndex:index]; + } +} + +void +mtl_set_fragment_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer, + uint32_t offset, uint32_t index) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLBuffer> buf = (id<MTLBuffer>)buffer; + [enc setFragmentBuffer:buf offset:offset atIndex:index]; + } +} + +void +mtl_draw_primitives(mtl_render_encoder *encoder, + enum mtl_primitive_type primitve_type, uint32_t vertexStart, + uint32_t vertexCount, uint32_t instanceCount, + uint32_t baseInstance) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + MTLPrimitiveType type = (MTLPrimitiveType)primitve_type; + [enc drawPrimitives:type vertexStart:vertexStart vertexCount:vertexCount instanceCount:instanceCount baseInstance:baseInstance]; + } +} + +void +mtl_draw_indexed_primitives( + mtl_render_encoder *encoder, enum mtl_primitive_type primitve_type, + uint32_t index_count, enum mtl_index_type index_type, + mtl_buffer *index_buffer, uint32_t index_buffer_offset, + uint32_t instance_count, int32_t base_vertex, uint32_t base_instance) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLBuffer> buf = (id<MTLBuffer>)index_buffer; + MTLIndexType ndx_type = (MTLIndexType)index_type; + MTLPrimitiveType primitive = (MTLPrimitiveType)primitve_type; + [enc drawIndexedPrimitives:primitive indexCount:index_count indexType:ndx_type indexBuffer:buf indexBufferOffset:index_buffer_offset instanceCount:instance_count baseVertex:base_vertex baseInstance:base_instance]; + } +} + +void +mtl_draw_primitives_indirect(mtl_render_encoder *encoder, + enum mtl_primitive_type primitve_type, + mtl_buffer *indirect_buffer, + uint64_t indirect_buffer_offset) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLBuffer> buf = (id<MTLBuffer>)indirect_buffer; + MTLPrimitiveType type = (MTLPrimitiveType)primitve_type; + [enc drawPrimitives:type indirectBuffer:buf indirectBufferOffset:indirect_buffer_offset]; + } +} + +void +mtl_draw_indexed_primitives_indirect(mtl_render_encoder *encoder, + enum mtl_primitive_type primitve_type, + enum mtl_index_type index_type, + mtl_buffer *index_buffer, + uint32_t index_buffer_offset, + mtl_buffer *indirect_buffer, + uint64_t indirect_buffer_offset) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLBuffer> buf = (id<MTLBuffer>)indirect_buffer; + id<MTLBuffer> ndx_buf = (id<MTLBuffer>)index_buffer; + MTLPrimitiveType type = (MTLPrimitiveType)primitve_type; + MTLIndexType ndx_type = (MTLIndexType)index_type; + [enc drawIndexedPrimitives:type indexType:ndx_type indexBuffer:ndx_buf indexBufferOffset:index_buffer_offset indirectBuffer:buf indirectBufferOffset:indirect_buffer_offset]; + } +} + +void +mtl_render_use_resource(mtl_compute_encoder *encoder, mtl_resource *res_handle, + uint32_t usage) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLResource> res = (id<MTLResource>)res_handle; + [enc useResource:res usage:(MTLResourceUsage)usage stages:MTLRenderStageVertex|MTLRenderStageFragment]; + } +} + +void +mtl_render_use_resources(mtl_render_encoder *encoder, + mtl_resource **resource_handles, uint32_t count, + enum mtl_resource_usage usage) +{ + @autoreleasepool { + // id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLResource> *handles = (id<MTLResource>*)resource_handles; + for (uint32_t i = 0u; i < count; ++i) { + if (handles[i] != NULL) + mtl_render_use_resource(encoder, handles[i], usage); + } + /* TODO_KOSMICKRISP No null values in the array or Metal complains */ + // [enc useResources:handles count:count usage:(MTLResourceUsage)usage]; + } +} + +void +mtl_render_use_heaps(mtl_render_encoder *encoder, mtl_heap **heaps, + uint32_t count) +{ + @autoreleasepool { + id<MTLRenderCommandEncoder> enc = (id<MTLRenderCommandEncoder>)encoder; + id<MTLHeap> *handles = (id<MTLHeap>*)heaps; + [enc useHeaps:handles count:count stages:MTLRenderStageVertex|MTLRenderStageFragment]; + } +} diff --git a/src/kosmickrisp/bridge/mtl_format.h b/src/kosmickrisp/bridge/mtl_format.h new file mode 100644 index 00000000000..5a2577c5a28 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_format.h @@ -0,0 +1,205 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ +#ifndef MTL_FORMAT_H +#define MTL_FORMAT_H 1 + +/* TODO_KOSMICKRISP Haven't modified all + * Slightly modified naming so they match to enum pipe_format for convenience + */ +enum mtl_pixel_format { + MTL_PIXEL_FORMAT_INVALID = 0, + + /* Normal 8 bit formats */ + MTL_PIXEL_FORMAT_A8_UNORM = 1, + + MTL_PIXEL_FORMAT_R8_UNORM = 10, + MTL_PIXEL_FORMAT_R8_SRGB = 11, + MTL_PIXEL_FORMAT_R8_SNORM = 12, + MTL_PIXEL_FORMAT_R8_UINT = 13, + MTL_PIXEL_FORMAT_R8_SINT = 14, + + /* Normal 16 bit formats */ + MTL_PIXEL_FORMAT_R16_UNORM = 20, + MTL_PIXEL_FORMAT_R16_SNORM = 22, + MTL_PIXEL_FORMAT_R16_UINT = 23, + MTL_PIXEL_FORMAT_R16_SINT = 24, + MTL_PIXEL_FORMAT_R16_FLOAT = 25, + + MTL_PIXEL_FORMAT_R8G8_UNORM = 30, + MTL_PIXEL_FORMAT_R8G8_SRGB = 31, + MTL_PIXEL_FORMAT_R8G8_SNORM = 32, + MTL_PIXEL_FORMAT_R8G8_UINT = 33, + MTL_PIXEL_FORMAT_R8G8_SINT = 34, + + /* Packed 16 bit formats */ + MTL_PIXEL_FORMAT_B5G6R5_UNORM = 40, + MTL_PIXEL_FORMAT_A1B5G5R5_UNORM = 41, + MTL_PIXEL_FORMAT_A4B4G4R4_UNORM = 42, + MTL_PIXEL_FORMAT_B5G5R5A1_UNORM = 43, + + /* Normal 32 bit formats */ + MTL_PIXEL_FORMAT_R32_UINT = 53, + MTL_PIXEL_FORMAT_R32_SINT = 54, + MTL_PIXEL_FORMAT_R32_FLOAT = 55, + + MTL_PIXEL_FORMAT_R16G16_UNORM = 60, + MTL_PIXEL_FORMAT_R16G16_SNORM = 62, + MTL_PIXEL_FORMAT_R16G16_UINT = 63, + MTL_PIXEL_FORMAT_R16G16_SINT = 64, + MTL_PIXEL_FORMAT_R16G16_FLOAT = 65, + + MTL_PIXEL_FORMAT_R8G8B8A8_UNORM = 70, + MTL_PIXEL_FORMAT_R8G8B8A8_SRGB = 71, + MTL_PIXEL_FORMAT_R8G8B8A8_SNORM = 72, + MTL_PIXEL_FORMAT_R8G8B8A8_UINT = 73, + MTL_PIXEL_FORMAT_R8G8B8A8_SINT = 74, + + MTL_PIXEL_FORMAT_B8G8R8A8_UNORM = 80, + MTL_PIXEL_FORMAT_B8G8R8A8_SRGB = 81, + + /* Packed 32 bit formats */ + MTL_PIXEL_FORMAT_R10G10B10A2_UNORM = 90, + MTL_PIXEL_FORMAT_R10G10B10A2_UINT = 91, + + MTL_PIXEL_FORMAT_R11G11B10_FLOAT = 92, + MTL_PIXEL_FORMAT_R9G9B9E5_FLOAT = 93, + + MTL_PIXEL_FORMAT_B10G10R10A2_UNORM = 94, + + MTL_PIXEL_FORMAT_BGR10_XR = 554, + MTL_PIXEL_FORMAT_BGR10_XR_SRGB = 555, + + /* Normal 64 bit formats */ + MTL_PIXEL_FORMAT_R32G32_UINT = 103, + MTL_PIXEL_FORMAT_R32G32_SINT = 104, + MTL_PIXEL_FORMAT_R32G32_FLOAT = 105, + + MTL_PIXEL_FORMAT_R16G16B16A16_UNORM = 110, + MTL_PIXEL_FORMAT_R16G16B16A16_SNORM = 112, + MTL_PIXEL_FORMAT_R16G16B16A16_UINT = 113, + MTL_PIXEL_FORMAT_R16G16B16A16_SINT = 114, + MTL_PIXEL_FORMAT_R16G16B16A16_FLOAT = 115, + + MTL_PIXEL_FORMAT_BGRA10_XR = 552, + MTL_PIXEL_FORMAT_BGRA10_XR_SRGB = 553, + + /* Normal 128 bit formats */ + MTL_PIXEL_FORMAT_R32G32B32A32_UINT = 123, + MTL_PIXEL_FORMAT_R32G32B32A32_SINT = 124, + MTL_PIXEL_FORMAT_R32G32B32A32_FLOAT = 125, + + /* Compressed formats. */ + + /* S3TC/DXT */ + MTL_PIXEL_FORMAT_BC1_RGBA = 130, + MTL_PIXEL_FORMAT_BC1_RGBA_SRGB = 131, + MTL_PIXEL_FORMAT_BC2_RGBA = 132, + MTL_PIXEL_FORMAT_BC2_RGBA_SRGB = 133, + MTL_PIXEL_FORMAT_BC3_RGBA = 134, + MTL_PIXEL_FORMAT_BC3_RGBA_SRGB = 135, + + /* RGTC */ + MTL_PIXEL_FORMAT_BC4_R_UNORM = 140, + MTL_PIXEL_FORMAT_BC4_R_SNORM = 141, + MTL_PIXEL_FORMAT_BC5_RG_UNORM = 142, + MTL_PIXEL_FORMAT_BC5_RG_SNORM = 143, + + /* BPTC */ + MTL_PIXEL_FORMAT_BC6H_RGB_FLOAT = 150, + MTL_PIXEL_FORMAT_BC6H_RGBU_FLOAT = 151, + MTL_PIXEL_FORMAT_BC7_RGBA_UNORM = 152, + MTL_PIXEL_FORMAT_BC7_RGBA_SRGB = 153, + + /* PVRTC */ + MTL_PIXEL_FORMAT_PVRTC_RGB_2BPP = 160, + MTL_PIXEL_FORMAT_PVRTC_RGB_2BPP_SRGB = 161, + MTL_PIXEL_FORMAT_PVRTC_RGB_4BPP = 162, + MTL_PIXEL_FORMAT_PVRTC_RGB_4BPP_SRGB = 163, + MTL_PIXEL_FORMAT_PVRTC_RGBA_2BPP = 164, + MTL_PIXEL_FORMAT_PVRTC_RGBA_2BPP_SRGB = 165, + MTL_PIXEL_FORMAT_PVRTC_RGBA_4BPP = 166, + MTL_PIXEL_FORMAT_PVRTC_RGBA_4BPP_SRGB = 167, + + /* ETC2 */ + MTL_PIXEL_FORMAT_ETC2_R11_UNORM = 170, + MTL_PIXEL_FORMAT_ETC2_R11_SNORM = 172, + MTL_PIXEL_FORMAT_ETC2_RG11_UNORM = 174, + MTL_PIXEL_FORMAT_ETC2_RG11_SNORM = 176, + MTL_PIXEL_FORMAT_ETC2_RGBA8 = 178, + MTL_PIXEL_FORMAT_ETC2_SRGBA8 = 179, + + MTL_PIXEL_FORMAT_ETC2_RGB8 = 180, + MTL_PIXEL_FORMAT_ETC2_SRGB8 = 181, + MTL_PIXEL_FORMAT_ETC2_RGB8A1 = 182, + MTL_PIXEL_FORMAT_ETC2_SRGB8A1 = 183, + + /* ASTC */ + MTL_PIXEL_FORMAT_ASTC_4x4_SRGB = 186, + MTL_PIXEL_FORMAT_ASTC_5x4_SRGB = 187, + MTL_PIXEL_FORMAT_ASTC_5x5_SRGB = 188, + MTL_PIXEL_FORMAT_ASTC_6x5_SRGB = 189, + MTL_PIXEL_FORMAT_ASTC_6x6_SRGB = 190, + MTL_PIXEL_FORMAT_ASTC_8x5_SRGB = 192, + MTL_PIXEL_FORMAT_ASTC_8x6_SRGB = 193, + MTL_PIXEL_FORMAT_ASTC_8x8_SRGB = 194, + MTL_PIXEL_FORMAT_ASTC_10x5_SRGB = 195, + MTL_PIXEL_FORMAT_ASTC_10x6_SRGB = 196, + MTL_PIXEL_FORMAT_ASTC_10x8_SRGB = 197, + MTL_PIXEL_FORMAT_ASTC_10x10_SRGB = 198, + MTL_PIXEL_FORMAT_ASTC_12x10_SRGB = 199, + MTL_PIXEL_FORMAT_ASTC_12x12_SRGB = 200, + + MTL_PIXEL_FORMAT_ASTC_4x4 = 204, + MTL_PIXEL_FORMAT_ASTC_5x4 = 205, + MTL_PIXEL_FORMAT_ASTC_5x5 = 206, + MTL_PIXEL_FORMAT_ASTC_6x5 = 207, + MTL_PIXEL_FORMAT_ASTC_6x6 = 208, + MTL_PIXEL_FORMAT_ASTC_8x5 = 210, + MTL_PIXEL_FORMAT_ASTC_8x6 = 211, + MTL_PIXEL_FORMAT_ASTC_8x8 = 212, + MTL_PIXEL_FORMAT_ASTC_10x5 = 213, + MTL_PIXEL_FORMAT_ASTC_10x6 = 214, + MTL_PIXEL_FORMAT_ASTC_10x8 = 215, + MTL_PIXEL_FORMAT_ASTC_10x10 = 216, + MTL_PIXEL_FORMAT_ASTC_12x10 = 217, + MTL_PIXEL_FORMAT_ASTC_12x12 = 218, + + /* ASTC HDR (High Dynamic Range) */ + MTL_PIXEL_FORMAT_ASTC_4x4_HDR = 222, + MTL_PIXEL_FORMAT_ASTC_5x4_HDR = 223, + MTL_PIXEL_FORMAT_ASTC_5x5_HDR = 224, + MTL_PIXEL_FORMAT_ASTC_6x5_HDR = 225, + MTL_PIXEL_FORMAT_ASTC_6x6_HDR = 226, + MTL_PIXEL_FORMAT_ASTC_8x5_HDR = 228, + MTL_PIXEL_FORMAT_ASTC_8x6_HDR = 229, + MTL_PIXEL_FORMAT_ASTC_8x8_HDR = 230, + MTL_PIXEL_FORMAT_ASTC_10x5_HDR = 231, + MTL_PIXEL_FORMAT_ASTC_10x6_HDR = 232, + MTL_PIXEL_FORMAT_ASTC_10x8_HDR = 233, + MTL_PIXEL_FORMAT_ASTC_10x10_HDR = 234, + MTL_PIXEL_FORMAT_ASTC_12x10_HDR = 235, + MTL_PIXEL_FORMAT_ASTC_12x12_HDR = 236, + + /* YUV */ + MTL_PIXEL_FORMAT_GBGR422 = 240, + MTL_PIXEL_FORMAT_BGRG422 = 241, + + /* DEPTH */ + MTL_PIXEL_FORMAT_Z16_UNORM = 250, + MTL_PIXEL_FORMAT_Z32_FLOAT = 252, + + /* STENCIL */ + MTL_PIXEL_FORMAT_S8_UINT = 253, + + /* DEPTH STENCIL */ + MTL_PIXEL_FORMAT_Z24_UNORM_S8_UINT = 255, + MTL_PIXEL_FORMAT_Z32_FLOAT_S8X24_UINT = 260, + + MTL_PIXEL_FORMAT_X32_S8X24_UINT = 261, + MTL_PIXEL_FORMAT_X24_S8_UINT = 262, +}; + +#endif /* MTL_FORMAT_H */ diff --git a/src/kosmickrisp/bridge/mtl_heap.h b/src/kosmickrisp/bridge/mtl_heap.h new file mode 100644 index 00000000000..fd34ac4ac5b --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_heap.h @@ -0,0 +1,30 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef MTL_HEAP_H +#define MTL_HEAP_H 1 + +#include "mtl_types.h" + +#include <inttypes.h> + +/* TODO_KOSMICKRISP We should move this struct to the bridge side. */ +struct kk_image_layout; + +/* Creation */ +mtl_heap *mtl_new_heap(mtl_device *device, uint64_t size, + enum mtl_resource_options resource_options); + +/* Utils */ +uint64_t mtl_heap_get_size(mtl_heap *heap); + +/* Allocation from heap */ +mtl_buffer *mtl_new_buffer_with_length(mtl_heap *heap, uint64_t size_B, + uint64_t offset_B); +mtl_texture *mtl_new_texture_with_descriptor( + mtl_heap *heap, const struct kk_image_layout *layout, uint64_t offset); + +#endif /* MTL_HEAP_H */
\ No newline at end of file diff --git a/src/kosmickrisp/bridge/mtl_heap.m b/src/kosmickrisp/bridge/mtl_heap.m new file mode 100644 index 00000000000..c658c0253ec --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_heap.m @@ -0,0 +1,83 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_heap.h" + +/* TODO_KOSMICKRISP Remove */ +#include "kk_private.h" +#include "kk_image_layout.h" + +#include <Metal/MTLHeap.h> + +/* Creation */ +mtl_heap * +mtl_new_heap(mtl_device *device, uint64_t size, + enum mtl_resource_options resource_options) +{ + @autoreleasepool { + id<MTLDevice> dev = (id<MTLDevice>)device; + MTLHeapDescriptor *descriptor = [[MTLHeapDescriptor new] autorelease]; + descriptor.type = MTLHeapTypePlacement; + descriptor.resourceOptions = (MTLResourceOptions)resource_options; + descriptor.size = size; + descriptor.sparsePageSize = MTLSparsePageSize16; + return [dev newHeapWithDescriptor:descriptor]; + } +} + +/* Utils */ +uint64_t +mtl_heap_get_size(mtl_heap *heap) +{ + @autoreleasepool { + id<MTLHeap> hp = (id<MTLHeap>)heap; + return hp.size; + } +} + +static MTLTextureDescriptor * +mtl_new_texture_descriptor(const struct kk_image_layout *layout) +{ + @autoreleasepool { + MTLTextureDescriptor *descriptor = [MTLTextureDescriptor new]; + descriptor.textureType = (MTLTextureType)layout->type; + descriptor.pixelFormat = layout->format.mtl; + descriptor.width = layout->width_px; + descriptor.height = layout->height_px; + descriptor.depth = layout->depth_px; + descriptor.mipmapLevelCount = layout->levels; + descriptor.sampleCount = layout->sample_count_sa; + descriptor.arrayLength = layout->layers; + descriptor.allowGPUOptimizedContents = layout->optimized_layout; + descriptor.usage = (MTLTextureUsage)layout->usage; + /* We don't set the swizzle because Metal complains when the usage has store or render target with swizzle... */ + + return descriptor; + } +} + +/* Allocation from heap */ +mtl_buffer * +mtl_new_buffer_with_length(mtl_heap *heap, uint64_t size_B, uint64_t offset_B) +{ + @autoreleasepool { + id<MTLHeap> hp = (id<MTLHeap>)heap; + return (mtl_buffer *)[hp newBufferWithLength:size_B options:KK_MTL_RESOURCE_OPTIONS offset:offset_B]; + } +} + +mtl_texture * +mtl_new_texture_with_descriptor(mtl_heap *heap, + const struct kk_image_layout *layout, + uint64_t offset) +{ + @autoreleasepool { + id<MTLHeap> hp = (id<MTLHeap>)heap; + MTLTextureDescriptor *descriptor = [mtl_new_texture_descriptor(layout) autorelease]; + descriptor.resourceOptions = hp.resourceOptions; + return [hp newTextureWithDescriptor:descriptor offset:offset]; + } +} diff --git a/src/kosmickrisp/bridge/mtl_library.h b/src/kosmickrisp/bridge/mtl_library.h new file mode 100644 index 00000000000..32bdd8a5529 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_library.h @@ -0,0 +1,16 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef MTL_LIBRARY_H +#define MTL_LIBRARY_H 1 + +#include "mtl_types.h" + +mtl_library *mtl_new_library(mtl_device *device, const char *src); +mtl_function *mtl_new_function_with_name(mtl_library *lib, + const char *entry_point); + +#endif /* MTL_LIBRARY_H */ diff --git a/src/kosmickrisp/bridge/mtl_library.m b/src/kosmickrisp/bridge/mtl_library.m new file mode 100644 index 00000000000..a40cea9619b --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_library.m @@ -0,0 +1,43 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_library.h" + +#include <Metal/MTLDevice.h> + +mtl_library * +mtl_new_library(mtl_device *device, const char *src) +{ + @autoreleasepool { + id<MTLDevice> dev = (id<MTLDevice>)device; + id<MTLLibrary> lib = NULL; + NSString *nsstr = [NSString stringWithCString:src encoding:NSASCIIStringEncoding]; + NSError *error; + MTLCompileOptions *comp_opts = [MTLCompileOptions new]; + comp_opts.languageVersion = MTLLanguageVersion3_2; + comp_opts.mathMode = MTLMathModeSafe; + comp_opts.mathFloatingPointFunctions = MTLMathFloatingPointFunctionsPrecise; + lib = [dev newLibraryWithSource:nsstr options:comp_opts error:&error]; + + if (error != nil) { + fprintf(stderr, "Failed to create MTLLibrary: %s\n", [error.localizedDescription UTF8String]); + } + + [comp_opts release]; + return lib; + } +} + +mtl_function * +mtl_new_function_with_name(mtl_library *lib, const char *entry_point) +{ + @autoreleasepool { + id<MTLLibrary> mtl_lib = (id<MTLLibrary>)lib; + NSString *ns_entry_point = [NSString stringWithCString:entry_point encoding:NSASCIIStringEncoding]; + return [mtl_lib newFunctionWithName:ns_entry_point]; + } +} + diff --git a/src/kosmickrisp/bridge/mtl_render_state.h b/src/kosmickrisp/bridge/mtl_render_state.h new file mode 100644 index 00000000000..cf13b4678f8 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_render_state.h @@ -0,0 +1,165 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef MTL_RENDER_STATE_H +#define MTL_RENDER_STATE_H 1 + +#include "mtl_types.h" + +#include <inttypes.h> +#include <stdbool.h> + +/* Bridge enums */ +enum mtl_pixel_format; + +/* TODO_KOSMICKRISP Remove */ +enum VkCompareOp; +enum VkStencilOp; + +/* Render pass descriptor */ +mtl_render_pass_descriptor *mtl_new_render_pass_descriptor(void); + +mtl_render_pass_attachment_descriptor * +mtl_render_pass_descriptor_get_color_attachment( + mtl_render_pass_descriptor *descriptor, uint32_t index); + +mtl_render_pass_attachment_descriptor * +mtl_render_pass_descriptor_get_depth_attachment( + mtl_render_pass_descriptor *descriptor); + +mtl_render_pass_attachment_descriptor * +mtl_render_pass_descriptor_get_stencil_attachment( + mtl_render_pass_descriptor *descriptor); + +void mtl_render_pass_attachment_descriptor_set_texture( + mtl_render_pass_attachment_descriptor *descriptor, mtl_texture *texture); + +void mtl_render_pass_attachment_descriptor_set_level( + mtl_render_pass_attachment_descriptor *descriptor, uint32_t level); + +void mtl_render_pass_attachment_descriptor_set_slice( + mtl_render_pass_attachment_descriptor *descriptor, uint32_t slice); + +void mtl_render_pass_attachment_descriptor_set_load_action( + mtl_render_pass_attachment_descriptor *descriptor, + enum mtl_load_action action); + +void mtl_render_pass_attachment_descriptor_set_store_action( + mtl_render_pass_attachment_descriptor *descriptor, + enum mtl_store_action action); + +void mtl_render_pass_attachment_descriptor_set_clear_color( + mtl_render_pass_attachment_descriptor *descriptor, + struct mtl_clear_color clear_color); + +void mtl_render_pass_attachment_descriptor_set_clear_depth( + mtl_render_pass_attachment_descriptor *descriptor, double depth); + +void mtl_render_pass_attachment_descriptor_set_clear_stencil( + mtl_render_pass_attachment_descriptor *descriptor, uint32_t stencil); + +void mtl_render_pass_descriptor_set_render_target_array_length( + mtl_render_pass_descriptor *descriptor, uint32_t length); + +void mtl_render_pass_descriptor_set_render_target_width( + mtl_render_pass_descriptor *descriptor, uint32_t width); + +void mtl_render_pass_descriptor_set_render_target_height( + mtl_render_pass_descriptor *descriptor, uint32_t height); + +void mtl_render_pass_descriptor_set_default_raster_sample_count( + mtl_render_pass_descriptor *descriptor, uint32_t sample_count); + +void mtl_render_pass_descriptor_set_visibility_buffer( + mtl_render_pass_descriptor *descriptor, mtl_buffer *visibility_buffer); + +/* Render pipeline descriptor */ +mtl_render_pipeline_descriptor *mtl_new_render_pipeline_descriptor(void); + +void mtl_render_pipeline_descriptor_set_vertex_shader( + mtl_render_pass_descriptor *descriptor, mtl_function *shader); + +void mtl_render_pipeline_descriptor_set_fragment_shader( + mtl_render_pass_descriptor *descriptor, mtl_function *shader); + +void mtl_render_pipeline_descriptor_set_input_primitive_topology( + mtl_render_pass_descriptor *descriptor, + enum mtl_primitive_topology_class topology_class); + +void mtl_render_pipeline_descriptor_set_color_attachment_format( + mtl_render_pass_descriptor *descriptor, uint8_t index, + enum mtl_pixel_format format); + +void mtl_render_pipeline_descriptor_set_depth_attachment_format( + mtl_render_pass_descriptor *descriptor, enum mtl_pixel_format format); + +void mtl_render_pipeline_descriptor_set_stencil_attachment_format( + mtl_render_pass_descriptor *descriptor, enum mtl_pixel_format format); + +void mtl_render_pipeline_descriptor_set_raster_sample_count( + mtl_render_pass_descriptor *descriptor, uint32_t sample_count); + +void mtl_render_pipeline_descriptor_set_alpha_to_coverage( + mtl_render_pass_descriptor *descriptor, bool enabled); + +void mtl_render_pipeline_descriptor_set_alpha_to_one( + mtl_render_pass_descriptor *descriptor, bool enabled); + +void mtl_render_pipeline_descriptor_set_rasterization_enabled( + mtl_render_pass_descriptor *descriptor, bool enabled); + +void mtl_render_pipeline_descriptor_set_max_vertex_amplification_count( + mtl_render_pass_descriptor *descriptor, uint32_t count); + +/* Render pipeline */ +mtl_render_pipeline_state * +mtl_new_render_pipeline(mtl_device *device, + mtl_render_pass_descriptor *descriptor); + +/* Stencil descriptor */ +mtl_stencil_descriptor *mtl_new_stencil_descriptor(void); + +void mtl_stencil_descriptor_set_stencil_failure_operation( + mtl_stencil_descriptor *descriptor, enum VkStencilOp op); + +void mtl_stencil_descriptor_set_depth_failure_operation( + mtl_stencil_descriptor *descriptor, enum VkStencilOp op); + +void mtl_stencil_descriptor_set_depth_stencil_pass_operation( + mtl_stencil_descriptor *descriptor, enum VkStencilOp op); + +void mtl_stencil_descriptor_set_stencil_compare_function( + mtl_stencil_descriptor *descriptor, enum VkCompareOp op); + +void mtl_stencil_descriptor_set_read_mask(mtl_stencil_descriptor *descriptor, + uint32_t mask); + +void mtl_stencil_descriptor_set_write_mask(mtl_stencil_descriptor *descriptor, + uint32_t mask); + +/* Depth stencil descriptor */ +mtl_depth_stencil_descriptor *mtl_new_depth_stencil_descriptor(void); + +void mtl_depth_stencil_descriptor_set_depth_compare_function( + mtl_depth_stencil_descriptor *descriptor, enum VkCompareOp op); + +void mtl_depth_stencil_descriptor_set_depth_write_enabled( + mtl_depth_stencil_descriptor *descriptor, bool enable_write); + +void mtl_depth_stencil_descriptor_set_back_face_stencil( + mtl_depth_stencil_descriptor *descriptor, + mtl_stencil_descriptor *stencil_descriptor); + +void mtl_depth_stencil_descriptor_set_front_face_stencil( + mtl_depth_stencil_descriptor *descriptor, + mtl_stencil_descriptor *stencil_descriptor); + +/* Depth stencil state */ +mtl_depth_stencil_state * +mtl_new_depth_stencil_state(mtl_device *device, + mtl_depth_stencil_descriptor *descriptor); + +#endif /* MTL_RENDER_STATE_H */ diff --git a/src/kosmickrisp/bridge/mtl_render_state.m b/src/kosmickrisp/bridge/mtl_render_state.m new file mode 100644 index 00000000000..0a5051c286a --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_render_state.m @@ -0,0 +1,475 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_render_state.h" + +#include "mtl_format.h" + +/* TODO_KOSMICKRISP Remove */ +#include "vk_to_mtl_map.h" + +/* TODO_KOSMICKRISP Remove */ +#include "vulkan/vulkan.h" + +#include <Metal/MTLRenderPass.h> +#include <Metal/MTLRenderPipeline.h> +#include <Metal/MTLDepthStencil.h> + +/* Render pass descriptor */ +mtl_render_pass_descriptor * +mtl_new_render_pass_descriptor(void) +{ + @autoreleasepool { + return [[MTLRenderPassDescriptor renderPassDescriptor] retain]; + } +} + +mtl_render_pass_attachment_descriptor * +mtl_render_pass_descriptor_get_color_attachment( + mtl_render_pass_descriptor *descriptor, uint32_t index) +{ + @autoreleasepool { + MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor; + return desc.colorAttachments[index]; + } +} + +mtl_render_pass_attachment_descriptor * +mtl_render_pass_descriptor_get_depth_attachment( + mtl_render_pass_descriptor *descriptor) +{ + @autoreleasepool { + MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor; + return desc.depthAttachment; + } +} + +mtl_render_pass_attachment_descriptor * +mtl_render_pass_descriptor_get_stencil_attachment( + mtl_render_pass_descriptor *descriptor) +{ + @autoreleasepool { + MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor; + return desc.stencilAttachment; + } +} + +void +mtl_render_pass_attachment_descriptor_set_texture( + mtl_render_pass_attachment_descriptor *descriptor, mtl_texture *texture) +{ + @autoreleasepool { + MTLRenderPassAttachmentDescriptor *desc = (MTLRenderPassAttachmentDescriptor *)descriptor; + desc.texture = (id<MTLTexture>)texture; + } +} + +void +mtl_render_pass_attachment_descriptor_set_level( + mtl_render_pass_attachment_descriptor *descriptor, uint32_t level) +{ + @autoreleasepool { + MTLRenderPassAttachmentDescriptor *desc = (MTLRenderPassAttachmentDescriptor *)descriptor; + desc.level = level; + } +} + +void +mtl_render_pass_attachment_descriptor_set_slice( + mtl_render_pass_attachment_descriptor *descriptor, uint32_t slice) +{ + @autoreleasepool { + MTLRenderPassAttachmentDescriptor *desc = (MTLRenderPassAttachmentDescriptor *)descriptor; + desc.slice = slice; + } +} + +void +mtl_render_pass_attachment_descriptor_set_load_action( + mtl_render_pass_attachment_descriptor *descriptor, + enum mtl_load_action action) +{ + @autoreleasepool { + MTLRenderPassAttachmentDescriptor *desc = (MTLRenderPassAttachmentDescriptor *)descriptor; + desc.loadAction = (MTLLoadAction)action; + } +} + +void +mtl_render_pass_attachment_descriptor_set_store_action( + mtl_render_pass_attachment_descriptor *descriptor, + enum mtl_store_action action) +{ + @autoreleasepool { + MTLRenderPassAttachmentDescriptor *desc = (MTLRenderPassAttachmentDescriptor *)descriptor; + desc.storeAction = (MTLStoreAction)action; + desc.storeActionOptions = MTLStoreActionOptionNone; /* TODO_KOSMICKRISP Maybe expose this? */ + } +} + +void +mtl_render_pass_attachment_descriptor_set_clear_color( + mtl_render_pass_attachment_descriptor *descriptor, + struct mtl_clear_color clear_color) +{ + @autoreleasepool { + MTLRenderPassColorAttachmentDescriptor *desc = (MTLRenderPassColorAttachmentDescriptor *)descriptor; + desc.clearColor = MTLClearColorMake(clear_color.red, clear_color.green, clear_color.blue, clear_color.alpha); + } +} + +void +mtl_render_pass_attachment_descriptor_set_clear_depth( + mtl_render_pass_attachment_descriptor *descriptor, double depth) +{ + @autoreleasepool { + MTLRenderPassDepthAttachmentDescriptor *desc = (MTLRenderPassDepthAttachmentDescriptor *)descriptor; + desc.clearDepth = depth; + } +} + +void +mtl_render_pass_attachment_descriptor_set_clear_stencil(mtl_render_pass_attachment_descriptor *descriptor, + uint32_t stencil) +{ + @autoreleasepool { + MTLRenderPassStencilAttachmentDescriptor *desc = (MTLRenderPassStencilAttachmentDescriptor *)descriptor; + desc.clearStencil = stencil; + } +} + +void +mtl_render_pass_descriptor_set_render_target_array_length(mtl_render_pass_descriptor *descriptor, + uint32_t length) +{ + @autoreleasepool { + MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor; + desc.renderTargetArrayLength = length; + } +} + +void +mtl_render_pass_descriptor_set_render_target_width(mtl_render_pass_descriptor *descriptor, + uint32_t width) +{ + @autoreleasepool { + MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor; + desc.renderTargetWidth = width; + } +} + +void +mtl_render_pass_descriptor_set_render_target_height(mtl_render_pass_descriptor *descriptor, + uint32_t height) +{ + @autoreleasepool { + MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor; + desc.renderTargetHeight = height; + } +} + +void +mtl_render_pass_descriptor_set_default_raster_sample_count(mtl_render_pass_descriptor *descriptor, + uint32_t sample_count) +{ + @autoreleasepool { + MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor; + desc.defaultRasterSampleCount = sample_count; + } +} + +void +mtl_render_pass_descriptor_set_visibility_buffer(mtl_render_pass_descriptor *descriptor, + mtl_buffer *visibility_buffer) +{ + @autoreleasepool { + MTLRenderPassDescriptor *desc = (MTLRenderPassDescriptor *)descriptor; + id<MTLBuffer> buffer = (id<MTLBuffer>)visibility_buffer; + desc.visibilityResultBuffer = buffer; + } +} + +/* Render pipeline descriptor */ +mtl_render_pipeline_descriptor * +mtl_new_render_pipeline_descriptor() +{ + @autoreleasepool { + return [[MTLRenderPipelineDescriptor alloc] init]; + } +} + +void +mtl_render_pipeline_descriptor_set_vertex_shader(mtl_render_pass_descriptor *descriptor, + mtl_function *shader) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + desc.vertexFunction = (id<MTLFunction>)shader; + } +} + +void +mtl_render_pipeline_descriptor_set_fragment_shader(mtl_render_pass_descriptor *descriptor, + mtl_function *shader) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + desc.fragmentFunction = (id<MTLFunction>)shader; + } +} + +void +mtl_render_pipeline_descriptor_set_input_primitive_topology(mtl_render_pass_descriptor *descriptor, + enum mtl_primitive_topology_class class) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + desc.inputPrimitiveTopology = (MTLPrimitiveTopologyClass)class; + } +} + +void +mtl_render_pipeline_descriptor_set_color_attachment_format(mtl_render_pass_descriptor *descriptor, + uint8_t index, + enum mtl_pixel_format format) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + desc.colorAttachments[index].pixelFormat = (MTLPixelFormat)format; + } +} + +void +mtl_render_pipeline_descriptor_set_depth_attachment_format(mtl_render_pass_descriptor *descriptor, + enum mtl_pixel_format format) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + desc.depthAttachmentPixelFormat = (MTLPixelFormat)format; + } +} + +void +mtl_render_pipeline_descriptor_set_stencil_attachment_format(mtl_render_pass_descriptor *descriptor, + enum mtl_pixel_format format) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + desc.stencilAttachmentPixelFormat = (MTLPixelFormat)format; + } +} + +void +mtl_render_pipeline_descriptor_set_raster_sample_count(mtl_render_pass_descriptor *descriptor, + uint32_t sample_count) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + desc.rasterSampleCount = sample_count; + } +} + +void +mtl_render_pipeline_descriptor_set_alpha_to_coverage(mtl_render_pass_descriptor *descriptor, + bool enabled) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + desc.alphaToCoverageEnabled = enabled; + } +} + +void +mtl_render_pipeline_descriptor_set_alpha_to_one(mtl_render_pass_descriptor *descriptor, + bool enabled) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + desc.alphaToOneEnabled = enabled; + } +} + +void +mtl_render_pipeline_descriptor_set_rasterization_enabled(mtl_render_pass_descriptor *descriptor, + bool enabled) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + desc.rasterizationEnabled = enabled; + } +} + +void +mtl_render_pipeline_descriptor_set_max_vertex_amplification_count( mtl_render_pass_descriptor *descriptor, + uint32_t count) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + desc.maxVertexAmplificationCount = count; + } +} + +/* Render pipeline */ +mtl_render_pipeline_state * +mtl_new_render_pipeline(mtl_device *device, mtl_render_pass_descriptor *descriptor) +{ + @autoreleasepool { + MTLRenderPipelineDescriptor *desc = (MTLRenderPipelineDescriptor *)descriptor; + id<MTLDevice> dev = (id<MTLDevice>)device; + NSError *error = nil; + mtl_render_pipeline_state *pipeline = [dev newRenderPipelineStateWithDescriptor:desc error:&error]; + if (error != nil) { + fprintf(stderr, "Failed to create MTLLibrary: %s\n", [error.localizedDescription UTF8String]); + } + + return pipeline; + } +} + +/* Stencil descriptor */ +mtl_stencil_descriptor * +mtl_new_stencil_descriptor() +{ + @autoreleasepool { + return [[MTLStencilDescriptor new] init]; + } +} + +/* TODO_KOSMICKRISP Move this to map */ +static MTLStencilOperation +map_vk_stencil_op_to_mtl_stencil_operation(VkStencilOp op) +{ + switch (op) { + case VK_STENCIL_OP_KEEP: + return MTLStencilOperationKeep; + case VK_STENCIL_OP_ZERO: + return MTLStencilOperationZero; + case VK_STENCIL_OP_REPLACE: + return MTLStencilOperationReplace; + case VK_STENCIL_OP_INCREMENT_AND_CLAMP: + return MTLStencilOperationIncrementClamp; + case VK_STENCIL_OP_DECREMENT_AND_CLAMP: + return MTLStencilOperationDecrementClamp; + case VK_STENCIL_OP_INVERT: + return MTLStencilOperationInvert; + case VK_STENCIL_OP_INCREMENT_AND_WRAP: + return MTLStencilOperationIncrementWrap; + case VK_STENCIL_OP_DECREMENT_AND_WRAP: + return MTLStencilOperationDecrementWrap; + default: + assert(false && "Unsupported VkStencilOp"); + return MTLStencilOperationZero; + }; +} + +void +mtl_stencil_descriptor_set_stencil_failure_operation(mtl_stencil_descriptor *descriptor, VkStencilOp op) +{ + @autoreleasepool { + MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor; + desc.stencilFailureOperation = map_vk_stencil_op_to_mtl_stencil_operation(op); + } +} + +void +mtl_stencil_descriptor_set_depth_failure_operation(mtl_stencil_descriptor *descriptor, VkStencilOp op) +{ + @autoreleasepool { + MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor; + desc.depthFailureOperation = map_vk_stencil_op_to_mtl_stencil_operation(op); + } +} + +void +mtl_stencil_descriptor_set_depth_stencil_pass_operation(mtl_stencil_descriptor *descriptor, VkStencilOp op) +{ + @autoreleasepool { + MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor; + desc.depthStencilPassOperation = map_vk_stencil_op_to_mtl_stencil_operation(op); + } +} + +void +mtl_stencil_descriptor_set_stencil_compare_function(mtl_stencil_descriptor *descriptor, VkCompareOp op) +{ + @autoreleasepool { + MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor; + desc.stencilCompareFunction = (MTLCompareFunction)vk_compare_op_to_mtl_compare_function(op); + } +} + +void +mtl_stencil_descriptor_set_read_mask(mtl_stencil_descriptor *descriptor, uint32_t mask) +{ + @autoreleasepool { + MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor; + desc.readMask = mask; + } +} + +void +mtl_stencil_descriptor_set_write_mask(mtl_stencil_descriptor *descriptor, uint32_t mask) +{ + @autoreleasepool { + MTLStencilDescriptor *desc = (MTLStencilDescriptor *)descriptor; + desc.writeMask = mask; + } +} + +/* Depth stencil descriptor */ +mtl_depth_stencil_descriptor * +mtl_new_depth_stencil_descriptor() +{ + @autoreleasepool { + return [[MTLDepthStencilDescriptor new] init]; + } +} + +void +mtl_depth_stencil_descriptor_set_depth_compare_function(mtl_depth_stencil_descriptor *descriptor, VkCompareOp op) +{ + @autoreleasepool { + MTLDepthStencilDescriptor *desc = (MTLDepthStencilDescriptor *)descriptor; + desc.depthCompareFunction = (MTLCompareFunction)vk_compare_op_to_mtl_compare_function(op); + } +} + +void +mtl_depth_stencil_descriptor_set_depth_write_enabled(mtl_depth_stencil_descriptor *descriptor, bool enable_write) +{ + @autoreleasepool { + MTLDepthStencilDescriptor *desc = (MTLDepthStencilDescriptor *)descriptor; + desc.depthWriteEnabled = enable_write; + } +} + +void +mtl_depth_stencil_descriptor_set_back_face_stencil(mtl_depth_stencil_descriptor *descriptor, mtl_stencil_descriptor *stencil_descriptor) +{ + @autoreleasepool { + MTLDepthStencilDescriptor *desc = (MTLDepthStencilDescriptor *)descriptor; + desc.backFaceStencil = (MTLStencilDescriptor *)stencil_descriptor; + } +} + +void +mtl_depth_stencil_descriptor_set_front_face_stencil(mtl_depth_stencil_descriptor *descriptor, mtl_stencil_descriptor *stencil_descriptor) +{ + @autoreleasepool { + MTLDepthStencilDescriptor *desc = (MTLDepthStencilDescriptor *)descriptor; + desc.frontFaceStencil = (MTLStencilDescriptor *)stencil_descriptor; + } +} + +mtl_depth_stencil_state * +mtl_new_depth_stencil_state(mtl_device *device, mtl_depth_stencil_descriptor *descriptor) +{ + @autoreleasepool { + id<MTLDevice> dev = (id<MTLDevice>)device; + MTLDepthStencilDescriptor *desc = (MTLDepthStencilDescriptor *)descriptor; + return [dev newDepthStencilStateWithDescriptor:desc]; + } +} diff --git a/src/kosmickrisp/bridge/mtl_sampler.h b/src/kosmickrisp/bridge/mtl_sampler.h new file mode 100644 index 00000000000..ab751195b2c --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_sampler.h @@ -0,0 +1,50 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef MTL_SAMPLER_H +#define MTL_SAMPLER_H 1 + +#include "mtl_types.h" + +#include <inttypes.h> +#include <stdbool.h> + +/* Sampler descriptor */ +mtl_sampler_descriptor *mtl_new_sampler_descriptor(void); + +/* Sampler descriptor utils */ +void mtl_sampler_descriptor_set_normalized_coordinates( + mtl_sampler_descriptor *descriptor, bool normalized_coordinates); +void mtl_sampler_descriptor_set_address_mode( + mtl_sampler_descriptor *descriptor, + enum mtl_sampler_address_mode address_mode_u, + enum mtl_sampler_address_mode address_mode_v, + enum mtl_sampler_address_mode address_mode_w); +void +mtl_sampler_descriptor_set_border_color(mtl_sampler_descriptor *descriptor, + enum mtl_sampler_border_color color); +void +mtl_sampler_descriptor_set_filters(mtl_sampler_descriptor *descriptor, + enum mtl_sampler_min_mag_filter min_filter, + enum mtl_sampler_min_mag_filter mag_filter, + enum mtl_sampler_mip_filter mip_filter); +void mtl_sampler_descriptor_set_lod_clamp(mtl_sampler_descriptor *descriptor, + float min, float max); +void +mtl_sampler_descriptor_set_max_anisotropy(mtl_sampler_descriptor *descriptor, + uint64_t max); +void +mtl_sampler_descriptor_set_compare_function(mtl_sampler_descriptor *descriptor, + enum mtl_compare_function function); + +/* Sampler */ +mtl_sampler *mtl_new_sampler(mtl_device *device, + mtl_sampler_descriptor *descriptor); + +/* Sampler utils */ +uint64_t mtl_sampler_get_gpu_resource_id(mtl_sampler *sampler); + +#endif /* MTL_SAMPLER_H */
\ No newline at end of file diff --git a/src/kosmickrisp/bridge/mtl_sampler.m b/src/kosmickrisp/bridge/mtl_sampler.m new file mode 100644 index 00000000000..f7ddb66b7e1 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_sampler.m @@ -0,0 +1,118 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_sampler.h" + +#include <Metal/MTLSampler.h> + +mtl_sampler_descriptor * +mtl_new_sampler_descriptor() +{ + @autoreleasepool { + MTLSamplerDescriptor *descriptor = [MTLSamplerDescriptor new]; + /* Set common variables we don't expose */ + descriptor.lodAverage = false; + descriptor.supportArgumentBuffers = true; + return descriptor; + } +} + +void +mtl_sampler_descriptor_set_normalized_coordinates(mtl_sampler_descriptor *descriptor, bool normalized_coordinates) +{ + @autoreleasepool { + MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor; + desc.normalizedCoordinates = normalized_coordinates; + } +} + +void +mtl_sampler_descriptor_set_address_mode(mtl_sampler_descriptor *descriptor, + enum mtl_sampler_address_mode address_mode_u, + enum mtl_sampler_address_mode address_mode_v, + enum mtl_sampler_address_mode address_mode_w) +{ + @autoreleasepool { + MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor; + desc.sAddressMode = (MTLSamplerAddressMode)address_mode_u; + desc.tAddressMode = (MTLSamplerAddressMode)address_mode_v; + desc.rAddressMode = (MTLSamplerAddressMode)address_mode_w; + } +} + +void +mtl_sampler_descriptor_set_border_color(mtl_sampler_descriptor *descriptor, enum mtl_sampler_border_color color) +{ + @autoreleasepool { + MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor; + desc.borderColor = (MTLSamplerBorderColor)color; + } +} + +void +mtl_sampler_descriptor_set_filters(mtl_sampler_descriptor *descriptor, + enum mtl_sampler_min_mag_filter min_filter, + enum mtl_sampler_min_mag_filter mag_filter, + enum mtl_sampler_mip_filter mip_filter) +{ + @autoreleasepool { + MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor; + desc.minFilter = (MTLSamplerMinMagFilter)min_filter; + desc.magFilter = (MTLSamplerMinMagFilter)mag_filter; + desc.mipFilter = (MTLSamplerMipFilter)mip_filter; + } +} + +void +mtl_sampler_descriptor_set_lod_clamp(mtl_sampler_descriptor *descriptor, + float min, + float max) +{ + @autoreleasepool { + MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor; + desc.lodMinClamp = min; + desc.lodMaxClamp = max; + } +} + +void +mtl_sampler_descriptor_set_max_anisotropy(mtl_sampler_descriptor *descriptor, + uint64_t max) +{ + @autoreleasepool { + MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor; + desc.maxAnisotropy = max ? max : 1u; /* Metal requires a non-zero value */ + } +} + +void +mtl_sampler_descriptor_set_compare_function(mtl_sampler_descriptor *descriptor, + enum mtl_compare_function function) +{ + @autoreleasepool { + MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor; + desc.compareFunction = (MTLCompareFunction)function; + } +} + +mtl_sampler * +mtl_new_sampler(mtl_device *device, mtl_sampler_descriptor *descriptor) +{ + @autoreleasepool { + id<MTLDevice> dev = (id<MTLDevice>)device; + MTLSamplerDescriptor *desc = (MTLSamplerDescriptor *)descriptor; + return [dev newSamplerStateWithDescriptor:desc]; + } +} + +uint64_t +mtl_sampler_get_gpu_resource_id(mtl_sampler *sampler) +{ + @autoreleasepool { + id<MTLSamplerState> samp = (id<MTLSamplerState>)sampler; + return [samp gpuResourceID]._impl; + } +} diff --git a/src/kosmickrisp/bridge/mtl_sync.h b/src/kosmickrisp/bridge/mtl_sync.h new file mode 100644 index 00000000000..8df0057cd75 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_sync.h @@ -0,0 +1,29 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef MTL_SYNC_H +#define MTL_SYNC_H 1 + +#include "mtl_types.h" + +#include <inttypes.h> + +/* MTLFence */ +mtl_fence *mtl_new_fence(mtl_device *device); + +/* MTLEvent */ +mtl_event *mtl_new_event(mtl_device *device); + +/* MTLSharedEvent */ +mtl_shared_event *mtl_new_shared_event(mtl_device *device); +int mtl_shared_event_wait_until_signaled_value(mtl_shared_event *event_handle, + uint64_t value, + uint64_t timeout_ms); +uint64_t mtl_shared_event_get_signaled_value(mtl_shared_event *event_handle); +void mtl_shared_event_set_signaled_value(mtl_shared_event *event_handle, + uint64_t value); + +#endif /* MTL_SYNC_H */
\ No newline at end of file diff --git a/src/kosmickrisp/bridge/mtl_sync.m b/src/kosmickrisp/bridge/mtl_sync.m new file mode 100644 index 00000000000..d7f5b211318 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_sync.m @@ -0,0 +1,66 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_sync.h" + +#include <Metal/MTLEvent.h> + +/* MTLFence */ +mtl_fence * +mtl_new_fence(mtl_device *device) +{ + @autoreleasepool { + id<MTLDevice> dev = (id<MTLDevice>)device; + return (mtl_fence *)[dev newFence]; + } +} + +/* MTLEvent */ +mtl_event * +mtl_new_event(mtl_device *device) +{ + @autoreleasepool { + id<MTLDevice> dev = (id<MTLDevice>)device; + return [dev newEvent]; + } +} + +/* MTLSharedEvent */ +mtl_shared_event * +mtl_new_shared_event(mtl_device *device) +{ + @autoreleasepool { + id<MTLDevice> dev = (id<MTLDevice>)device; + return [dev newSharedEvent]; + } +} + +int +mtl_shared_event_wait_until_signaled_value(mtl_shared_event *event_handle, uint64_t value, uint64_t timeout_ms) +{ + @autoreleasepool { + id<MTLSharedEvent> event = (id<MTLSharedEvent>)event_handle; + return (int)[event waitUntilSignaledValue:value timeoutMS:timeout_ms]; + } +} + +void +mtl_shared_event_set_signaled_value(mtl_shared_event *event_handle, uint64_t value) +{ + @autoreleasepool { + id<MTLSharedEvent> event = (id<MTLSharedEvent>)event_handle; + event.signaledValue = value; + } +} + +uint64_t +mtl_shared_event_get_signaled_value(mtl_shared_event *event_handle) +{ + @autoreleasepool { + id<MTLSharedEvent> event = (id<MTLSharedEvent>)event_handle; + return event.signaledValue; + } +} diff --git a/src/kosmickrisp/bridge/mtl_texture.h b/src/kosmickrisp/bridge/mtl_texture.h new file mode 100644 index 00000000000..ab1de341b01 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_texture.h @@ -0,0 +1,27 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef MTL_TEXTURE_H +#define MTL_TEXTURE_H 1 + +#include "mtl_types.h" + +#include <inttypes.h> + +/* TODO_KOSMICKRISP Move this to bridge. */ +struct kk_view_layout; + +/* Utils*/ +uint64_t mtl_texture_get_gpu_resource_id(mtl_texture *texture); + +/* Texture view creation */ +mtl_texture *mtl_new_texture_view_with(mtl_texture *texture, + const struct kk_view_layout *layout); +mtl_texture * +mtl_new_texture_view_with_no_swizzle(mtl_texture *texture, + const struct kk_view_layout *layout); + +#endif /* MTL_TEXTURE_H */ diff --git a/src/kosmickrisp/bridge/mtl_texture.m b/src/kosmickrisp/bridge/mtl_texture.m new file mode 100644 index 00000000000..5042e7f2926 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_texture.m @@ -0,0 +1,94 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_texture.h" + +/* TODO_LUNARG Remove */ +#include "kk_image_layout.h" + +/* TODO_LUNARG Remove */ +#include "vulkan/vulkan.h" + +#include <Metal/MTLTexture.h> + +uint64_t +mtl_texture_get_gpu_resource_id(mtl_texture *texture) +{ + @autoreleasepool { + id<MTLTexture> tex = (id<MTLTexture>)texture; + return (uint64_t)[tex gpuResourceID]._impl; + } +} + +/* TODO_KOSMICKRISP This should be part of the mapping */ +static uint32_t +mtl_texture_view_type(uint32_t type, uint8_t sample_count) +{ + switch (type) { + case VK_IMAGE_VIEW_TYPE_1D: + return MTLTextureType1D; + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + return MTLTextureType1DArray; + case VK_IMAGE_VIEW_TYPE_2D: + return sample_count > 1u ? MTLTextureType2DMultisample : MTLTextureType2D;; + case VK_IMAGE_VIEW_TYPE_CUBE: + return MTLTextureTypeCube; + case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: + return MTLTextureTypeCubeArray; + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + return sample_count > 1u ? MTLTextureType2DMultisampleArray : MTLTextureType2DArray; + case VK_IMAGE_VIEW_TYPE_3D: + return MTLTextureType3D; + default: + assert(false && "Unsupported VkViewType"); + return MTLTextureType1D; + } +} + +static MTLTextureSwizzle +mtl_texture_swizzle(enum pipe_swizzle swizzle) +{ + const MTLTextureSwizzle map[] = + { + [PIPE_SWIZZLE_X] = MTLTextureSwizzleRed, + [PIPE_SWIZZLE_Y] = MTLTextureSwizzleGreen, + [PIPE_SWIZZLE_Z] = MTLTextureSwizzleBlue, + [PIPE_SWIZZLE_W] = MTLTextureSwizzleAlpha, + [PIPE_SWIZZLE_0] = MTLTextureSwizzleZero, + [PIPE_SWIZZLE_1] = MTLTextureSwizzleOne, + }; + + return map[swizzle]; +} + +mtl_texture * +mtl_new_texture_view_with(mtl_texture *texture, const struct kk_view_layout *layout) +{ + @autoreleasepool { + id<MTLTexture> tex = (id<MTLTexture>)texture; + MTLTextureType type = mtl_texture_view_type(layout->view_type, layout->sample_count_sa); + NSRange levels = NSMakeRange(layout->base_level, layout->num_levels); + NSRange slices = NSMakeRange(layout->base_array_layer, layout->array_len); + MTLTextureSwizzleChannels swizzle = MTLTextureSwizzleChannelsMake(mtl_texture_swizzle(layout->swizzle.red), + mtl_texture_swizzle(layout->swizzle.green), + mtl_texture_swizzle(layout->swizzle.blue), + mtl_texture_swizzle(layout->swizzle.alpha)); + return [tex newTextureViewWithPixelFormat:layout->format.mtl textureType:type levels:levels slices:slices swizzle:swizzle]; + } +} + +mtl_texture * +mtl_new_texture_view_with_no_swizzle(mtl_texture *texture, const struct kk_view_layout *layout) +{ + @autoreleasepool { + id<MTLTexture> tex = (id<MTLTexture>)texture; + MTLTextureType type = mtl_texture_view_type(layout->view_type, layout->sample_count_sa); + NSRange levels = NSMakeRange(layout->base_level, layout->num_levels); + NSRange slices = NSMakeRange(layout->base_array_layer, layout->array_len); + return [tex newTextureViewWithPixelFormat:layout->format.mtl textureType:type levels:levels slices:slices]; + } +} + diff --git a/src/kosmickrisp/bridge/mtl_types.h b/src/kosmickrisp/bridge/mtl_types.h new file mode 100644 index 00000000000..90bfbdb8315 --- /dev/null +++ b/src/kosmickrisp/bridge/mtl_types.h @@ -0,0 +1,272 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + * + * Structures and enums found in this file are a 1-1 mapping of Metal's + * equivalents + */ + +#ifndef KK_MTL_TYPES_H +#define KK_MTL_TYPES_H 1 + +#include <stddef.h> /* For size_t definition */ + +/** HANDLES */ +typedef void mtl_device; +typedef void mtl_heap; +typedef void mtl_buffer; +typedef void mtl_texture; +typedef void mtl_command_queue; +typedef void mtl_command_buffer; +typedef void mtl_command_encoder; +typedef void mtl_blit_encoder; +typedef void mtl_compute_encoder; +typedef void mtl_render_encoder; +typedef void mtl_event; +typedef void mtl_shared_event; +typedef void mtl_sampler_descriptor; +typedef void mtl_sampler; +typedef void mtl_compute_pipeline_state; +typedef void mtl_library; +typedef void mtl_render_pipeline_state; +typedef void mtl_function; +typedef void mtl_resource; +typedef void mtl_render_pass_descriptor; +typedef void mtl_render_pipeline_descriptor; +typedef void mtl_fence; +typedef void mtl_stencil_descriptor; +typedef void mtl_depth_stencil_descriptor; +typedef void mtl_depth_stencil_state; +typedef void mtl_render_pass_attachment_descriptor; + +/** ENUMS */ +enum mtl_cpu_cache_mode { + MTL_CPU_CACHE_MODE_DEFAULT_CACHE = 0, + MTL_CPU_CACHE_MODE_WRITE_COMBINED = 1, +}; + +enum mtl_storage_mode { + MTL_STORAGE_MODE_SHARED = 0, + MTL_STORAGE_MODE_MANAGED = 1, + MTL_STORAGE_MODE_PRIVATE = 2, + MTL_STORAGE_MODE_MEMORYLESS = 3, +}; + +enum mtl_hazard_tracking_mode { + MTL_HAZARD_TRACKING_MODE_DFEAULT = 0, + MTL_HAZARD_TRACKING_MODE_UNTRACKED = 1, + MTL_HAZARD_TRACKING_MODE_TRACKED = 2, +}; + +#define MTL_RESOURCE_CPU_CACHE_MODE_SHIFT 0 +#define MTL_RESOURCE_STORAGE_MODE_SHIFT 4 +#define MTL_RESOURCE_HAZARD_TRACKING_MODE_SHIFT 8 +enum mtl_resource_options { + MTL_RESOURCE_CPU_CACHE_MODE_DEFAULT_CACHE = + MTL_CPU_CACHE_MODE_DEFAULT_CACHE << MTL_RESOURCE_CPU_CACHE_MODE_SHIFT, + MTL_RESOURCE_CPU_CACHE_MODE_WRITE_COMBINED = + MTL_CPU_CACHE_MODE_WRITE_COMBINED << MTL_RESOURCE_CPU_CACHE_MODE_SHIFT, + MTL_RESOURCE_STORAGE_MODE_SHARED = MTL_STORAGE_MODE_SHARED + << MTL_RESOURCE_STORAGE_MODE_SHIFT, + MTL_RESOURCE_STORAGE_MODE_PRIVATE = MTL_STORAGE_MODE_PRIVATE + << MTL_RESOURCE_STORAGE_MODE_SHIFT, + MTL_RESOURCE_TRACKING_MODE_DEFAULT = + MTL_HAZARD_TRACKING_MODE_DFEAULT + << MTL_RESOURCE_HAZARD_TRACKING_MODE_SHIFT, + MTL_RESOURCE_TRACKING_MODE_UNTRACKED = + MTL_HAZARD_TRACKING_MODE_UNTRACKED + << MTL_RESOURCE_HAZARD_TRACKING_MODE_SHIFT, + MTL_RESOURCE_TRACKING_MODE_TRACKED = + MTL_HAZARD_TRACKING_MODE_TRACKED + << MTL_RESOURCE_HAZARD_TRACKING_MODE_SHIFT, +}; + +enum mtl_blit_options { + MTL_BLIT_OPTION_NONE = 0, + MTL_BLIT_OPTION_DEPTH_FROM_DEPTH_STENCIL = 1 << 0, + MTL_BLIT_OPTION_STENCIL_FROM_DEPTH_STENCIL = 1 << 1, +}; + +enum mtl_resource_usage { + MTL_RESOURCE_USAGE_READ = 1 << 0, + MTL_RESOURCE_USAGE_WRITE = 1 << 1, +}; + +enum mtl_primitive_type { + MTL_PRIMITIVE_TYPE_POINT = 0, + MTL_PRIMITIVE_TYPE_LINE = 1, + MTL_PRIMITIVE_TYPE_LINE_STRIP = 2, + MTL_PRIMITIVE_TYPE_TRIANGLE = 3, + MTL_PRIMITIVE_TYPE_TRIANGLE_STRIP = 4, +}; + +enum mtl_primitive_topology_class { + MTL_PRIMITIVE_TOPOLOGY_CLASS_UNSPECIFIED = 0, + MTL_PRIMITIVE_TOPOLOGY_CLASS_POINT = 1, + MTL_PRIMITIVE_TOPOLOGY_CLASS_LINE = 2, + MTL_PRIMITIVE_TOPOLOGY_CLASS_TRIANGLE = 3, +}; + +enum mtl_texture_type { + MTL_TEXTURE_TYPE_1D = 0u, + MTL_TEXTURE_TYPE_1D_ARRAY = 1u, + MTL_TEXTURE_TYPE_2D = 2u, + MTL_TEXTURE_TYPE_2D_ARRAY = 3u, + MTL_TEXTURE_TYPE_2D_MULTISAMPLE = 4u, + MTL_TEXTURE_TYPE_CUBE = 5u, + MTL_TEXTURE_TYPE_CUBE_ARRAY = 6u, + MTL_TEXTURE_TYPE_3D = 7u, + MTL_TEXTURE_TYPE_2D_ARRAY_MULTISAMPLE = 8u, + MTL_TEXTURE_TYPE_TEXTURE_BUFFER = 9u, +}; + +enum mtl_texture_usage { + MTL_TEXTURE_USAGE_UNKNOWN = 0x0000, + MTL_TEXTURE_USAGE_SHADER_READ = 0x0001, + MTL_TEXTURE_USAGE_SHADER_WRITE = 0X0002, + MTL_TEXTURE_USAGE_RENDER_TARGET = 0X0004, + MTL_TEXTURE_USAGE_PIXEL_FORMAT_VIEW = 0X0010, + MTL_TEXTURE_USAGE_SHADER_ATOMIC = 0X0020, +}; + +enum mtl_load_action { + MTL_LOAD_ACTION_DONT_CARE = 0u, + MTL_LOAD_ACTION_LOAD = 1u, + MTL_LOAD_ACTION_CLEAR = 2u, +}; + +enum mtl_store_action { + MTL_STORE_ACTION_DONT_CARE = 0u, + MTL_STORE_ACTION_STORE = 1u, + MTL_STORE_ACTION_MULTISAMPLE_RESOLVE = 2u, + MTL_STORE_ACTION_STORE_AND_MULTISAMPLE_RESOLVE = 3u, + MTL_STORE_ACTION_UNKNOWN = 4u, + MTL_STORE_ACTION_CUSTOM_SAMPLE_DEPTH_STORE = 5u, +}; + +enum mtl_texture_swizzle { + MTL_TEXTURE_SWIZZLE_ZERO = 0, + MTL_TEXTURE_SWIZZLE_ONE = 1, + MTL_TEXTURE_SWIZZLE_RED = 2, + MTL_TEXTURE_SWIZZLE_GREEN = 3, + MTL_TEXTURE_SWIZZLE_BLUE = 4, + MTL_TEXTURE_SWIZZLE_ALPHA = 5, +}; + +enum mtl_index_type { + MTL_INDEX_TYPE_UINT16 = 0, + MTL_INDEX_TYPE_UINT32 = 1, +}; + +enum mtl_sampler_address_mode { + MTL_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE = 0, + MTL_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE = 1, + MTL_SAMPLER_ADDRESS_MODE_REPEAT = 2, + MTL_SAMPLER_ADDRESS_MODE_MIRROR_REPEAT = 3, + MTL_SAMPLER_ADDRESS_MODE_CLAMP_TO_ZERO = 4, + MTL_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER_COLOR = 5, +}; + +enum mtl_sampler_border_color { + MTL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK = 0, + MTL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK = 1, + MTL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE = 2, +}; + +enum mtl_sampler_min_mag_filter { + MTL_SAMPLER_MIN_MAG_FILTER_NEAREST = 0, + MTL_SAMPLER_MIN_MAG_FILTER_LINEAR = 1, +}; + +enum mtl_sampler_mip_filter { + MTL_SAMPLER_MIP_FILTER_NOT_MIP_MAPPED = 0, + MTL_SAMPLER_MIP_FILTER_NEAREST = 1, + MTL_SAMPLER_MIP_FILTER_LINEAR = 2, +}; + +enum mtl_compare_function { + MTL_COMPARE_FUNCTION_NEVER = 0, + MTL_COMPARE_FUNCTION_LESS = 1, + MTL_COMPARE_FUNCTION_EQUAL = 2, + MTL_COMPARE_FUNCTION_LESS_EQUAL = 3, + MTL_COMPARE_FUNCTION_GREATER = 4, + MTL_COMPARE_FUNCTION_NOT_EQUAL = 5, + MTL_COMPARE_FUNCTION_GREATER_EQUAL = 6, + MTL_COMPARE_FUNCTION_ALWAYS = 7, +}; + +enum mtl_winding { + MTL_WINDING_CLOCKWISE = 0, + MTL_WINDING_COUNTER_CLOCKWISE = 1, +}; + +enum mtl_cull_mode { + MTL_CULL_MODE_NONE = 0, + MTL_CULL_MODE_FRONT = 1, + MTL_CULL_MODE_BACK = 2, +}; + +enum mtl_visibility_result_mode { + MTL_VISIBILITY_RESULT_MODE_DISABLED = 0, + MTL_VISIBILITY_RESULT_MODE_BOOLEAN = 1, + MTL_VISIBILITY_RESULT_MODE_COUNTING = 2, +}; + +enum mtl_depth_clip_mode { + MTL_DEPTH_CLIP_MODE_CLIP = 0, + MTL_DEPTH_CLIP_MODE_CLAMP = 1, +}; + +/** STRUCTURES */ +struct mtl_range { + size_t offset; + size_t length; +}; + +struct mtl_origin { + size_t x, y, z; +}; + +struct mtl_size { + size_t x, y, z; +}; + +struct mtl_viewport { + double originX, originY, width, height, znear, zfar; +}; + +struct mtl_clear_color { + union { + struct { + double red, green, blue, alpha; + }; + double channel[4]; + }; +}; + +struct mtl_scissor_rect { + size_t x, y, width, height; +}; + +struct mtl_texture_swizzle_channels { + enum mtl_texture_swizzle red; + enum mtl_texture_swizzle green; + enum mtl_texture_swizzle blue; + enum mtl_texture_swizzle alpha; +}; + +struct mtl_buffer_image_copy { + struct mtl_size image_size; + struct mtl_origin image_origin; + mtl_buffer *buffer; + mtl_texture *image; + size_t buffer_offset_B; + size_t buffer_stride_B; + size_t buffer_2d_image_size_B; + size_t image_slice; + size_t image_level; + enum mtl_blit_options options; +}; + +#endif /* KK_MTL_TYPES_H */ diff --git a/src/kosmickrisp/bridge/stubs/mtl_bridge.c b/src/kosmickrisp/bridge/stubs/mtl_bridge.c new file mode 100644 index 00000000000..10628fcf89c --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_bridge.c @@ -0,0 +1,24 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_bridge.h" + +mtl_texture * +mtl_drawable_get_texture(void *drawable_ptr) +{ + return NULL; +} + +void * +mtl_retain(void *handle) +{ + return NULL; +} + +void +mtl_release(void *handle) +{ +} diff --git a/src/kosmickrisp/bridge/stubs/mtl_buffer.c b/src/kosmickrisp/bridge/stubs/mtl_buffer.c new file mode 100644 index 00000000000..ee98a5c88e2 --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_buffer.c @@ -0,0 +1,33 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_buffer.h" + +uint64_t +mtl_buffer_get_length(mtl_buffer *buffer) +{ + return 0u; +} + +uint64_t +mtl_buffer_get_gpu_address(mtl_buffer *buffer) +{ + return 0u; +} + +void * +mtl_get_contents(mtl_buffer *buffer) +{ + return NULL; +} + +mtl_texture * +mtl_new_texture_with_descriptor_linear(mtl_buffer *buffer, + const struct kk_image_layout *layout, + uint64_t offset) +{ + return NULL; +} diff --git a/src/kosmickrisp/bridge/stubs/mtl_command_buffer.c b/src/kosmickrisp/bridge/stubs/mtl_command_buffer.c new file mode 100644 index 00000000000..ff36e37aa9d --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_command_buffer.c @@ -0,0 +1,35 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_command_buffer.h" + +void +mtl_encode_signal_event(mtl_command_buffer *cmd_buf_handle, + mtl_event *event_handle, uint64_t value) +{ +} + +void +mtl_encode_wait_for_event(mtl_command_buffer *cmd_buf_handle, + mtl_event *event_handle, uint64_t value) +{ +} + +void +mtl_add_completed_handler(mtl_command_buffer *cmd, void (*callback)(void *data), + void *data) +{ +} + +void +mtl_command_buffer_commit(mtl_command_buffer *cmd_buf) +{ +} + +void +mtl_present_drawable(mtl_command_buffer *cmd_buf, void *drawable) +{ +}
\ No newline at end of file diff --git a/src/kosmickrisp/bridge/stubs/mtl_command_queue.c b/src/kosmickrisp/bridge/stubs/mtl_command_queue.c new file mode 100644 index 00000000000..aa39a3446ee --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_command_queue.c @@ -0,0 +1,19 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_command_queue.h" + +mtl_command_queue * +mtl_new_command_queue(mtl_device *device, uint32_t cmd_buffer_count) +{ + return NULL; +} + +mtl_command_buffer * +mtl_new_command_buffer(mtl_command_queue *cmd_queue) +{ + return NULL; +} diff --git a/src/kosmickrisp/bridge/stubs/mtl_compute_state.c b/src/kosmickrisp/bridge/stubs/mtl_compute_state.c new file mode 100644 index 00000000000..f7a8a6aa26f --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_compute_state.c @@ -0,0 +1,14 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_compute_state.h" + +mtl_compute_pipeline_state * +mtl_new_compute_pipeline_state(mtl_device *device, mtl_function *function, + uint64_t max_total_threads_per_threadgroup) +{ + return NULL; +} diff --git a/src/kosmickrisp/bridge/stubs/mtl_device.c b/src/kosmickrisp/bridge/stubs/mtl_device.c new file mode 100644 index 00000000000..78e22e30c12 --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_device.c @@ -0,0 +1,73 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_device.h" + +/* Device creation */ +mtl_device * +mtl_device_create(void) +{ + return NULL; +} + +/* Device operations */ +void +mtl_start_gpu_capture(mtl_device *mtl_dev_handle) +{ +} + +void +mtl_stop_gpu_capture(void) +{ +} + +/* Device feature query */ +void +mtl_device_get_name(mtl_device *dev, char buffer[256]) +{ +} + +void +mtl_device_get_architecture_name(mtl_device *dev, char buffer[256]) +{ +} + +uint64_t +mtl_device_get_peer_group_id(mtl_device *dev) +{ + return 0u; +} + +uint32_t +mtl_device_get_peer_index(mtl_device *dev) +{ + return 0u; +} + +uint64_t +mtl_device_get_registry_id(mtl_device *dev) +{ + return 0u; +} + +struct mtl_size +mtl_device_max_threads_per_threadgroup(mtl_device *dev) +{ + return (struct mtl_size){}; +} + +/* Resource queries */ +void +mtl_heap_buffer_size_and_align_with_length(mtl_device *device, uint64_t *size_B, + uint64_t *align_B) +{ +} + +void +mtl_heap_texture_size_and_align_with_descriptor(mtl_device *device, + struct kk_image_layout *layout) +{ +} diff --git a/src/kosmickrisp/bridge/stubs/mtl_encoder.c b/src/kosmickrisp/bridge/stubs/mtl_encoder.c new file mode 100644 index 00000000000..672e628c1dc --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_encoder.c @@ -0,0 +1,273 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_encoder.h" + +/* Common encoder utils */ +void +mtl_end_encoding(void *encoder) +{ +} + +/* MTLBlitEncoder */ +mtl_blit_encoder * +mtl_new_blit_command_encoder(mtl_command_buffer *cmd_buffer) +{ + return NULL; +} + +void +mtl_blit_update_fence(mtl_blit_encoder *encoder, mtl_fence *fence) +{ +} + +void +mtl_blit_wait_for_fence(mtl_blit_encoder *encoder, mtl_fence *fence) +{ +} + +void +mtl_copy_from_buffer_to_buffer(mtl_blit_encoder *blit_enc_handle, + mtl_buffer *src_buf, size_t src_offset, + mtl_buffer *dst_buf, size_t dst_offset, + size_t size) +{ +} + +void +mtl_copy_from_buffer_to_texture(mtl_blit_encoder *blit_enc_handle, + struct mtl_buffer_image_copy *data) +{ +} + +void +mtl_copy_from_texture_to_buffer(mtl_blit_encoder *blit_enc_handle, + struct mtl_buffer_image_copy *data) +{ +} + +void +mtl_copy_from_texture_to_texture(mtl_blit_encoder *blit_enc_handle, + mtl_texture *src_tex_handle, size_t src_slice, + size_t src_level, struct mtl_origin src_origin, + struct mtl_size src_size, + mtl_texture *dst_tex_handle, size_t dst_slice, + size_t dst_level, struct mtl_origin dst_origin) +{ +} + +/* MTLComputeEncoder */ +mtl_compute_encoder * +mtl_new_compute_command_encoder(mtl_command_buffer *cmd_buffer) +{ + return NULL; +} + +void +mtl_compute_update_fence(mtl_compute_encoder *encoder, mtl_fence *fence) +{ +} + +void +mtl_compute_wait_for_fence(mtl_compute_encoder *encoder, mtl_fence *fence) +{ +} + +void +mtl_compute_set_pipeline_state(mtl_compute_encoder *encoder, + mtl_compute_pipeline_state *state_handle) +{ +} + +void +mtl_compute_set_buffer(mtl_compute_encoder *encoder, mtl_buffer *buffer, + size_t offset, size_t index) +{ +} + +void +mtl_compute_use_resource(mtl_compute_encoder *encoder, mtl_resource *res_handle, + uint32_t usage) +{ +} + +void +mtl_compute_use_resources(mtl_compute_encoder *encoder, + mtl_resource **resource_handles, uint32_t count, + enum mtl_resource_usage usage) +{ +} + +void +mtl_compute_use_heaps(mtl_compute_encoder *encoder, mtl_heap **heaps, + uint32_t count) +{ +} + +void +mtl_dispatch_threads(mtl_compute_encoder *encoder, struct mtl_size grid_size, + struct mtl_size local_size) +{ +} + +void +mtl_dispatch_threadgroups_with_indirect_buffer(mtl_compute_encoder *encoder, + mtl_buffer *buffer, + uint32_t offset, + struct mtl_size local_size) +{ +} + +/* MTLRenderEncoder */ +mtl_render_encoder * +mtl_new_render_command_encoder_with_descriptor( + mtl_command_buffer *command_buffer, mtl_render_pass_descriptor *descriptor) +{ + return NULL; +} + +void +mtl_render_update_fence(mtl_render_encoder *encoder, mtl_fence *fence) +{ +} + +void +mtl_render_wait_for_fence(mtl_render_encoder *encoder, mtl_fence *fence) +{ +} + +void +mtl_set_viewports(mtl_render_encoder *encoder, struct mtl_viewport *viewports, + uint32_t count) +{ +} + +void +mtl_set_scissor_rects(mtl_render_encoder *encoder, + struct mtl_scissor_rect *scissor_rects, uint32_t count) +{ +} + +void +mtl_render_set_pipeline_state(mtl_render_encoder *encoder, + mtl_render_pipeline_state *pipeline) +{ +} + +void +mtl_set_depth_stencil_state(mtl_render_encoder *encoder, + mtl_depth_stencil_state *state) +{ +} + +void +mtl_set_stencil_references(mtl_render_encoder *encoder, uint32_t front, + uint32_t back) +{ +} + +void +mtl_set_front_face_winding(mtl_render_encoder *encoder, + enum mtl_winding winding) +{ +} + +void +mtl_set_cull_mode(mtl_render_encoder *encoder, enum mtl_cull_mode mode) +{ +} + +void +mtl_set_visibility_result_mode(mtl_render_encoder *encoder, + enum mtl_visibility_result_mode mode, + size_t offset) +{ +} + +void +mtl_set_depth_bias(mtl_render_encoder *encoder, float depth_bias, + float slope_scale, float clamp) +{ +} + +void +mtl_set_depth_clip_mode(mtl_render_encoder *encoder, + enum mtl_depth_clip_mode mode) +{ +} + +void +mtl_set_vertex_amplification_count(mtl_render_encoder *encoder, + uint32_t *layer_ids, uint32_t id_count) +{ +} + +void +mtl_set_vertex_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer, + uint32_t offset, uint32_t index) +{ +} + +void +mtl_set_fragment_buffer(mtl_render_encoder *encoder, mtl_buffer *buffer, + uint32_t offset, uint32_t index) +{ +} + +void +mtl_draw_primitives(mtl_render_encoder *encoder, + enum mtl_primitive_type primitve_type, uint32_t vertexStart, + uint32_t vertexCount, uint32_t instanceCount, + uint32_t baseInstance) +{ +} + +void +mtl_draw_indexed_primitives( + mtl_render_encoder *encoder, enum mtl_primitive_type primitve_type, + uint32_t index_count, enum mtl_index_type index_type, + mtl_buffer *index_buffer, uint32_t index_buffer_offset, + uint32_t instance_count, int32_t base_vertex, uint32_t base_instance) +{ +} + +void +mtl_draw_primitives_indirect(mtl_render_encoder *encoder, + enum mtl_primitive_type primitve_type, + mtl_buffer *indirect_buffer, + uint64_t indirect_buffer_offset) +{ +} + +void +mtl_draw_indexed_primitives_indirect(mtl_render_encoder *encoder, + enum mtl_primitive_type primitve_type, + enum mtl_index_type index_type, + mtl_buffer *index_buffer, + uint32_t index_buffer_offset, + mtl_buffer *indirect_buffer, + uint64_t indirect_buffer_offset) +{ +} + +void +mtl_render_use_resource(mtl_compute_encoder *encoder, mtl_resource *res_handle, + uint32_t usage) +{ +} + +void +mtl_render_use_resources(mtl_render_encoder *encoder, + mtl_resource **resource_handles, uint32_t count, + enum mtl_resource_usage usage) +{ +} + +void +mtl_render_use_heaps(mtl_render_encoder *encoder, mtl_heap **heaps, + uint32_t count) +{ +} diff --git a/src/kosmickrisp/bridge/stubs/mtl_heap.c b/src/kosmickrisp/bridge/stubs/mtl_heap.c new file mode 100644 index 00000000000..de916c4d76c --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_heap.c @@ -0,0 +1,37 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_heap.h" + +/* Creation */ +mtl_heap * +mtl_new_heap(mtl_device *device, uint64_t size, + enum mtl_resource_options resource_options) +{ + return NULL; +} + +/* Utils */ +uint64_t +mtl_heap_get_size(mtl_heap *heap) +{ + return 0u; +} + +/* Allocation from heap */ +mtl_buffer * +mtl_new_buffer_with_length(mtl_heap *heap, uint64_t size_B, uint64_t offset_B) +{ + return NULL; +} + +mtl_texture * +mtl_new_texture_with_descriptor(mtl_heap *heap, + const struct kk_image_layout *layout, + uint64_t offset) +{ + return NULL; +} diff --git a/src/kosmickrisp/bridge/stubs/mtl_library.c b/src/kosmickrisp/bridge/stubs/mtl_library.c new file mode 100644 index 00000000000..dcd41781e8b --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_library.c @@ -0,0 +1,19 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_library.h" + +mtl_library * +mtl_new_library(mtl_device *device, const char *src) +{ + return NULL; +} + +mtl_function * +mtl_new_function_with_name(mtl_library *lib, const char *entry_point) +{ + return NULL; +} diff --git a/src/kosmickrisp/bridge/stubs/mtl_render_state.c b/src/kosmickrisp/bridge/stubs/mtl_render_state.c new file mode 100644 index 00000000000..e971c5e460f --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_render_state.c @@ -0,0 +1,288 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_render_state.h" + +#include "mtl_format.h" + +/* TODO_KOSMICKRISP Remove */ +#include "vulkan/vulkan.h" + +/* Render pass descriptor */ +mtl_render_pass_descriptor * +mtl_new_render_pass_descriptor(void) +{ + return NULL; +} + +mtl_render_pass_attachment_descriptor * +mtl_render_pass_descriptor_get_color_attachment( + mtl_render_pass_descriptor *descriptor, uint32_t index) +{ + return NULL; +} + +mtl_render_pass_attachment_descriptor * +mtl_render_pass_descriptor_get_depth_attachment( + mtl_render_pass_descriptor *descriptor) +{ + return NULL; +} + +mtl_render_pass_attachment_descriptor * +mtl_render_pass_descriptor_get_stencil_attachment( + mtl_render_pass_descriptor *descriptor) +{ + return NULL; +} + +void +mtl_render_pass_attachment_descriptor_set_texture( + mtl_render_pass_attachment_descriptor *descriptor, mtl_texture *texture) +{ +} + +void +mtl_render_pass_attachment_descriptor_set_level( + mtl_render_pass_attachment_descriptor *descriptor, uint32_t level) +{ +} + +void +mtl_render_pass_attachment_descriptor_set_slice( + mtl_render_pass_attachment_descriptor *descriptor, uint32_t slice) +{ +} + +void +mtl_render_pass_attachment_descriptor_set_load_action( + mtl_render_pass_attachment_descriptor *descriptor, + enum mtl_load_action action) +{ +} + +void +mtl_render_pass_attachment_descriptor_set_store_action( + mtl_render_pass_attachment_descriptor *descriptor, + enum mtl_store_action action) +{ +} + +void +mtl_render_pass_attachment_descriptor_set_clear_color( + mtl_render_pass_attachment_descriptor *descriptor, + struct mtl_clear_color clear_color) +{ +} + +void +mtl_render_pass_attachment_descriptor_set_clear_depth( + mtl_render_pass_attachment_descriptor *descriptor, double depth) +{ +} + +void +mtl_render_pass_attachment_descriptor_set_clear_stencil( + mtl_render_pass_attachment_descriptor *descriptor, uint32_t stencil) +{ +} + +void +mtl_render_pass_descriptor_set_render_target_array_length( + mtl_render_pass_descriptor *descriptor, uint32_t length) +{ +} + +void +mtl_render_pass_descriptor_set_render_target_width( + mtl_render_pass_descriptor *descriptor, uint32_t width) +{ +} + +void +mtl_render_pass_descriptor_set_render_target_height( + mtl_render_pass_descriptor *descriptor, uint32_t height) +{ +} + +void +mtl_render_pass_descriptor_set_default_raster_sample_count( + mtl_render_pass_descriptor *descriptor, uint32_t sample_count) +{ +} + +void +mtl_render_pass_descriptor_set_visibility_buffer( + mtl_render_pass_descriptor *descriptor, mtl_buffer *visibility_buffer) +{ +} + +/* Render pipeline descriptor */ +mtl_render_pipeline_descriptor * +mtl_new_render_pipeline_descriptor(void) +{ + return NULL; +} + +void +mtl_render_pipeline_descriptor_set_vertex_shader( + mtl_render_pass_descriptor *descriptor, mtl_function *shader) +{ +} + +void +mtl_render_pipeline_descriptor_set_fragment_shader( + mtl_render_pass_descriptor *descriptor, mtl_function *shader) +{ +} + +void +mtl_render_pipeline_descriptor_set_input_primitive_topology( + mtl_render_pass_descriptor *descriptor, + enum mtl_primitive_topology_class topology_class) +{ +} + +void +mtl_render_pipeline_descriptor_set_color_attachment_format( + mtl_render_pass_descriptor *descriptor, uint8_t index, + enum mtl_pixel_format format) +{ +} + +void +mtl_render_pipeline_descriptor_set_depth_attachment_format( + mtl_render_pass_descriptor *descriptor, enum mtl_pixel_format format) +{ +} + +void +mtl_render_pipeline_descriptor_set_stencil_attachment_format( + mtl_render_pass_descriptor *descriptor, enum mtl_pixel_format format) +{ +} + +void +mtl_render_pipeline_descriptor_set_raster_sample_count( + mtl_render_pass_descriptor *descriptor, uint32_t sample_count) +{ +} + +void +mtl_render_pipeline_descriptor_set_alpha_to_coverage( + mtl_render_pass_descriptor *descriptor, bool enabled) +{ +} + +void +mtl_render_pipeline_descriptor_set_alpha_to_one( + mtl_render_pass_descriptor *descriptor, bool enabled) +{ +} + +void +mtl_render_pipeline_descriptor_set_rasterization_enabled( + mtl_render_pass_descriptor *descriptor, bool enabled) +{ +} + +void +mtl_render_pipeline_descriptor_set_max_vertex_amplification_count( + mtl_render_pass_descriptor *descriptor, uint32_t count) +{ +} + +/* Render pipeline */ +mtl_render_pipeline_state * +mtl_new_render_pipeline(mtl_device *device, + mtl_render_pass_descriptor *descriptor) +{ + return NULL; +} + +/* Stencil descriptor */ +mtl_stencil_descriptor * +mtl_new_stencil_descriptor(void) +{ + return NULL; +} + +void +mtl_stencil_descriptor_set_stencil_failure_operation( + mtl_stencil_descriptor *descriptor, enum VkStencilOp op) +{ +} + +void +mtl_stencil_descriptor_set_depth_failure_operation( + mtl_stencil_descriptor *descriptor, enum VkStencilOp op) +{ +} + +void +mtl_stencil_descriptor_set_depth_stencil_pass_operation( + mtl_stencil_descriptor *descriptor, enum VkStencilOp op) +{ +} + +void +mtl_stencil_descriptor_set_stencil_compare_function( + mtl_stencil_descriptor *descriptor, enum VkCompareOp op) +{ +} + +void +mtl_stencil_descriptor_set_read_mask(mtl_stencil_descriptor *descriptor, + uint32_t mask) +{ +} + +void +mtl_stencil_descriptor_set_write_mask(mtl_stencil_descriptor *descriptor, + uint32_t mask) +{ +} + +/* Depth stencil descriptor */ +mtl_depth_stencil_descriptor * +mtl_new_depth_stencil_descriptor(void) +{ + return NULL; +} + +void +mtl_depth_stencil_descriptor_set_depth_compare_function( + mtl_depth_stencil_descriptor *descriptor, enum VkCompareOp op) +{ +} + +void +mtl_depth_stencil_descriptor_set_depth_write_enabled( + mtl_depth_stencil_descriptor *descriptor, bool enable_write) +{ +} + +void +mtl_depth_stencil_descriptor_set_back_face_stencil( + mtl_depth_stencil_descriptor *descriptor, + mtl_stencil_descriptor *stencil_descriptor) +{ +} + +void +mtl_depth_stencil_descriptor_set_front_face_stencil( + mtl_depth_stencil_descriptor *descriptor, + mtl_stencil_descriptor *stencil_descriptor) +{ +} + +/* Depth stencil state */ +mtl_depth_stencil_state * +mtl_new_depth_stencil_state(mtl_device *device, + mtl_depth_stencil_descriptor *descriptor) +{ + return NULL; +} diff --git a/src/kosmickrisp/bridge/stubs/mtl_sampler.c b/src/kosmickrisp/bridge/stubs/mtl_sampler.c new file mode 100644 index 00000000000..a3530b38440 --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_sampler.c @@ -0,0 +1,74 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_sampler.h" + +/* Sampler descriptor */ +mtl_sampler_descriptor * +mtl_new_sampler_descriptor(void) +{ + return NULL; +} + +/* Sampler descriptor utils */ +void +mtl_sampler_descriptor_set_normalized_coordinates( + mtl_sampler_descriptor *descriptor, bool normalized_coordinates) +{ +} + +void +mtl_sampler_descriptor_set_address_mode( + mtl_sampler_descriptor *descriptor, + enum mtl_sampler_address_mode address_mode_u, + enum mtl_sampler_address_mode address_mode_v, + enum mtl_sampler_address_mode address_mode_w) +{ +} + +void +mtl_sampler_descriptor_set_border_color(mtl_sampler_descriptor *descriptor, + enum mtl_sampler_border_color color) +{ +} +void +mtl_sampler_descriptor_set_filters(mtl_sampler_descriptor *descriptor, + enum mtl_sampler_min_mag_filter min_filter, + enum mtl_sampler_min_mag_filter mag_filter, + enum mtl_sampler_mip_filter mip_filter) +{ +} + +void +mtl_sampler_descriptor_set_lod_clamp(mtl_sampler_descriptor *descriptor, + float min, float max) +{ +} + +void +mtl_sampler_descriptor_set_max_anisotropy(mtl_sampler_descriptor *descriptor, + uint64_t max) +{ +} +void +mtl_sampler_descriptor_set_compare_function(mtl_sampler_descriptor *descriptor, + enum mtl_compare_function function) +{ +} + +/* Sampler */ +mtl_sampler * +mtl_new_sampler(mtl_device *device, mtl_sampler_descriptor *descriptor) +{ + return NULL; +} + +/* Sampler utils */ +uint64_t +mtl_sampler_get_gpu_resource_id(mtl_sampler *sampler) +{ + return 0u; +} diff --git a/src/kosmickrisp/bridge/stubs/mtl_sync.c b/src/kosmickrisp/bridge/stubs/mtl_sync.c new file mode 100644 index 00000000000..0d4e1a8512e --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_sync.c @@ -0,0 +1,47 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_sync.h" + +/* MTLFence */ +mtl_fence * +mtl_new_fence(mtl_device *device) +{ + return NULL; +} + +/* MTLEvent */ +mtl_event * +mtl_new_event(mtl_device *device) +{ + return NULL; +} + +/* MTLSharedEvent */ +mtl_shared_event * +mtl_new_shared_event(mtl_device *device) +{ + return NULL; +} + +int +mtl_shared_event_wait_until_signaled_value(mtl_shared_event *event_handle, + uint64_t value, uint64_t timeout_ms) +{ + return 0; +} + +uint64_t +mtl_shared_event_get_signaled_value(mtl_shared_event *event_handle) +{ + return 0u; +} + +void +mtl_shared_event_set_signaled_value(mtl_shared_event *event_handle, + uint64_t value) +{ +} diff --git a/src/kosmickrisp/bridge/stubs/mtl_texture.c b/src/kosmickrisp/bridge/stubs/mtl_texture.c new file mode 100644 index 00000000000..8ed3b22ec95 --- /dev/null +++ b/src/kosmickrisp/bridge/stubs/mtl_texture.c @@ -0,0 +1,29 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "mtl_texture.h" + +/* Utils*/ +uint64_t +mtl_texture_get_gpu_resource_id(mtl_texture *texture) +{ + return 0u; +} + +/* Texture view creation */ +mtl_texture * +mtl_new_texture_view_with(mtl_texture *texture, + const struct kk_view_layout *layout) +{ + return NULL; +} + +mtl_texture * +mtl_new_texture_view_with_no_swizzle(mtl_texture *texture, + const struct kk_view_layout *layout) +{ + return NULL; +} diff --git a/src/kosmickrisp/bridge/vk_to_mtl_map.c b/src/kosmickrisp/bridge/vk_to_mtl_map.c new file mode 100644 index 00000000000..ef586dc09a0 --- /dev/null +++ b/src/kosmickrisp/bridge/vk_to_mtl_map.c @@ -0,0 +1,251 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "vk_to_mtl_map.h" + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "util/format/u_format.h" + +#include "vulkan/vulkan.h" +#include "vk_meta.h" + +struct mtl_origin +vk_offset_3d_to_mtl_origin(const struct VkOffset3D *offset) +{ + struct mtl_origin ret = { + .x = offset->x, + .y = offset->y, + .z = offset->z, + }; + return ret; +} + +struct mtl_size +vk_extent_3d_to_mtl_size(const struct VkExtent3D *extent) +{ + struct mtl_size ret = { + .x = extent->width, + .y = extent->height, + .z = extent->depth, + }; + return ret; +} + +enum mtl_primitive_type +vk_primitive_topology_to_mtl_primitive_type(enum VkPrimitiveTopology topology) +{ + switch (topology) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: + return MTL_PRIMITIVE_TYPE_POINT; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: + return MTL_PRIMITIVE_TYPE_LINE; + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: + return MTL_PRIMITIVE_TYPE_LINE_STRIP; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" + case VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA: +#pragma GCC diagnostic pop + /* Triangle fans are emulated meaning we'll translate the index buffer to + * triangle list or generate a index buffer if there's none */ + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: + return MTL_PRIMITIVE_TYPE_TRIANGLE; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: + return MTL_PRIMITIVE_TYPE_TRIANGLE_STRIP; + default: + assert(0 && "Primitive topology not supported!"); + return 0; + } +} + +enum mtl_primitive_topology_class +vk_primitive_topology_to_mtl_primitive_topology_class( + enum VkPrimitiveTopology topology) +{ + switch (topology) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: + return MTL_PRIMITIVE_TOPOLOGY_CLASS_POINT; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: + return MTL_PRIMITIVE_TOPOLOGY_CLASS_LINE; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" + case VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA: +#pragma GCC diagnostic pop + return MTL_PRIMITIVE_TOPOLOGY_CLASS_TRIANGLE; + default: + return MTL_PRIMITIVE_TOPOLOGY_CLASS_UNSPECIFIED; + } +} + +enum mtl_load_action +vk_attachment_load_op_to_mtl_load_action(enum VkAttachmentLoadOp op) +{ + switch (op) { + case VK_ATTACHMENT_LOAD_OP_LOAD: + return MTL_LOAD_ACTION_LOAD; + case VK_ATTACHMENT_LOAD_OP_CLEAR: + return MTL_LOAD_ACTION_CLEAR; + case VK_ATTACHMENT_LOAD_OP_DONT_CARE: + return MTL_LOAD_ACTION_DONT_CARE; + default: + assert(false && "Unsupported VkAttachmentLoadOp"); + return MTL_LOAD_ACTION_DONT_CARE; + }; +} + +enum mtl_store_action +vk_attachment_store_op_to_mtl_store_action(enum VkAttachmentStoreOp op) +{ + switch (op) { + case VK_ATTACHMENT_STORE_OP_STORE: + return MTL_STORE_ACTION_STORE; + case VK_ATTACHMENT_STORE_OP_DONT_CARE: + return MTL_STORE_ACTION_DONT_CARE; + case VK_ATTACHMENT_STORE_OP_NONE: + return MTL_STORE_ACTION_UNKNOWN; + default: + assert(false && "Unsupported VkAttachmentStoreOp"); + return MTL_STORE_ACTION_UNKNOWN; + }; +} + +enum mtl_sampler_address_mode +vk_sampler_address_mode_to_mtl_sampler_address_mode( + enum VkSamplerAddressMode mode) +{ + switch (mode) { + case VK_SAMPLER_ADDRESS_MODE_REPEAT: + return MTL_SAMPLER_ADDRESS_MODE_REPEAT; + case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: + return MTL_SAMPLER_ADDRESS_MODE_MIRROR_REPEAT; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: + return MTL_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: + return MTL_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER_COLOR; + case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: + return MTL_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + default: + UNREACHABLE("Unsupported address mode"); + } +} + +enum mtl_sampler_border_color +vk_border_color_to_mtl_sampler_border_color(enum VkBorderColor color) +{ + switch (color) { + case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: + case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: + return MTL_SAMPLER_BORDER_COLOR_TRANSPARENT_BLACK; + case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: + case VK_BORDER_COLOR_INT_OPAQUE_BLACK: + return MTL_SAMPLER_BORDER_COLOR_OPAQUE_BLACK; + case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: + case VK_BORDER_COLOR_INT_OPAQUE_WHITE: + return MTL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE; + case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: + case VK_BORDER_COLOR_INT_CUSTOM_EXT: + return MTL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE; + default: + UNREACHABLE("Unsupported address mode"); + } +} + +enum mtl_sampler_min_mag_filter +vk_filter_to_mtl_sampler_min_mag_filter(enum VkFilter filter) +{ + switch (filter) { + case VK_FILTER_NEAREST: + return MTL_SAMPLER_MIN_MAG_FILTER_NEAREST; + case VK_FILTER_LINEAR: + return MTL_SAMPLER_MIN_MAG_FILTER_LINEAR; + default: + UNREACHABLE("Unsupported address mode"); + } +} + +enum mtl_sampler_mip_filter +vk_sampler_mipmap_mode_to_mtl_sampler_mip_filter(enum VkSamplerMipmapMode mode) +{ + switch (mode) { + case VK_SAMPLER_MIPMAP_MODE_NEAREST: + return MTL_SAMPLER_MIP_FILTER_NEAREST; + case VK_SAMPLER_MIPMAP_MODE_LINEAR: + return MTL_SAMPLER_MIP_FILTER_LINEAR; + default: + UNREACHABLE("Unsupported address mode"); + } +} + +enum mtl_compare_function +vk_compare_op_to_mtl_compare_function(enum VkCompareOp op) +{ + switch (op) { + case VK_COMPARE_OP_NEVER: + return MTL_COMPARE_FUNCTION_NEVER; + case VK_COMPARE_OP_LESS: + return MTL_COMPARE_FUNCTION_LESS; + case VK_COMPARE_OP_EQUAL: + return MTL_COMPARE_FUNCTION_EQUAL; + case VK_COMPARE_OP_LESS_OR_EQUAL: + return MTL_COMPARE_FUNCTION_LESS_EQUAL; + case VK_COMPARE_OP_GREATER: + return MTL_COMPARE_FUNCTION_GREATER; + case VK_COMPARE_OP_NOT_EQUAL: + return MTL_COMPARE_FUNCTION_NOT_EQUAL; + case VK_COMPARE_OP_GREATER_OR_EQUAL: + return MTL_COMPARE_FUNCTION_GREATER_EQUAL; + case VK_COMPARE_OP_ALWAYS: + return MTL_COMPARE_FUNCTION_ALWAYS; + default: + UNREACHABLE("Unsupported address mode"); + } +} + +enum mtl_winding +vk_front_face_to_mtl_winding(enum VkFrontFace face) +{ + switch (face) { + case VK_FRONT_FACE_CLOCKWISE: + return MTL_WINDING_CLOCKWISE; + case VK_FRONT_FACE_COUNTER_CLOCKWISE: + return MTL_WINDING_COUNTER_CLOCKWISE; + default: + assert(false && "Unsupported VkFrontFace"); + return MTL_WINDING_CLOCKWISE; + } +} + +enum mtl_cull_mode +vk_front_face_to_mtl_cull_mode(enum VkCullModeFlagBits mode) +{ + switch (mode) { + case VK_CULL_MODE_NONE: + return MTL_CULL_MODE_NONE; + case VK_CULL_MODE_FRONT_BIT: + return MTL_CULL_MODE_FRONT; + case VK_CULL_MODE_BACK_BIT: + return MTL_CULL_MODE_BACK; + default: + UNREACHABLE("Unsupported VkCullModeFlags"); + } +} + +enum mtl_index_type +index_size_in_bytes_to_mtl_index_type(unsigned bytes) +{ + switch (bytes) { + case 2u: + return MTL_INDEX_TYPE_UINT16; + case 4u: + return MTL_INDEX_TYPE_UINT32; + default: + UNREACHABLE("Unsupported byte size for index"); + } +} diff --git a/src/kosmickrisp/bridge/vk_to_mtl_map.h b/src/kosmickrisp/bridge/vk_to_mtl_map.h new file mode 100644 index 00000000000..23e5506d9a6 --- /dev/null +++ b/src/kosmickrisp/bridge/vk_to_mtl_map.h @@ -0,0 +1,81 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ +#ifndef KK_MTL_TO_VK_MAP_H +#define KK_MTL_TO_VK_MAP_H 1 + +enum pipe_format; + +struct mtl_origin; +struct mtl_size; +enum mtl_primitive_type; +enum mtl_primitive_topology_class; +enum mtl_load_action; +enum mtl_store_action; +enum mtl_sampler_address_mode; +enum mtl_sampler_border_color; +enum mtl_sampler_min_mag_filter; +enum mtl_sampler_mip_filter; +enum mtl_compare_function; +enum mtl_winding; +enum mtl_cull_mode; +enum mtl_index_type; + +struct VkOffset3D; +struct VkExtent3D; +union VkClearColorValue; +enum VkPrimitiveTopology; +enum VkAttachmentLoadOp; +enum VkAttachmentStoreOp; +enum VkSamplerAddressMode; +enum VkBorderColor; +enum VkFilter; +enum VkSamplerMipmapMode; +enum VkCompareOp; +enum VkFrontFace; +enum VkCullModeFlagBits; + +/* STRUCTS */ +struct mtl_origin vk_offset_3d_to_mtl_origin(const struct VkOffset3D *offset); + +struct mtl_size vk_extent_3d_to_mtl_size(const struct VkExtent3D *extent); + +/* ENUMS */ +enum mtl_primitive_type +vk_primitive_topology_to_mtl_primitive_type(enum VkPrimitiveTopology topology); + +enum mtl_primitive_topology_class +vk_primitive_topology_to_mtl_primitive_topology_class( + enum VkPrimitiveTopology topology); + +enum mtl_load_action +vk_attachment_load_op_to_mtl_load_action(enum VkAttachmentLoadOp op); + +enum mtl_store_action +vk_attachment_store_op_to_mtl_store_action(enum VkAttachmentStoreOp op); + +enum mtl_sampler_address_mode +vk_sampler_address_mode_to_mtl_sampler_address_mode( + enum VkSamplerAddressMode mode); + +enum mtl_sampler_border_color +vk_border_color_to_mtl_sampler_border_color(enum VkBorderColor color); + +enum mtl_sampler_min_mag_filter +vk_filter_to_mtl_sampler_min_mag_filter(enum VkFilter filter); + +enum mtl_sampler_mip_filter +vk_sampler_mipmap_mode_to_mtl_sampler_mip_filter(enum VkSamplerMipmapMode mode); + +enum mtl_compare_function +vk_compare_op_to_mtl_compare_function(enum VkCompareOp op); + +enum mtl_winding vk_front_face_to_mtl_winding(enum VkFrontFace face); + +enum mtl_cull_mode vk_front_face_to_mtl_cull_mode(enum VkCullModeFlagBits mode); + +enum mtl_index_type index_size_in_bytes_to_mtl_index_type(unsigned bytes); + +#endif /* KK_MTL_TO_VK_MAP_H */ diff --git a/src/kosmickrisp/compiler/meson.build b/src/kosmickrisp/compiler/meson.build new file mode 100644 index 00000000000..34fe22245d2 --- /dev/null +++ b/src/kosmickrisp/compiler/meson.build @@ -0,0 +1,35 @@ +# Copyright 2025 LunarG, Inc. +# Copyright 2025 Google LLC +# SPDX-License-Identifier: MIT + +libmsl_compiler_files = files( + 'nir_to_msl.c', + 'msl_type_inference.c', + 'msl_iomap.c', + 'msl_nir_lower_common.c', + 'msl_nir_lower_subgroups.c', +) + +msl_nir_algebraic_c = custom_target( + input : 'msl_nir_algebraic.py', + output : 'msl_nir_algebraic.c', + command : [ + prog_python, '@INPUT@', '-p', dir_compiler_nir, + ], + capture : true, + depend_files : nir_algebraic_depends, +) + + +libmsl_compiler = static_library( + 'msl_compiler', + [libmsl_compiler_files, msl_nir_algebraic_c], + dependencies : [idep_nir, idep_mesautil], + gnu_symbol_visibility: 'hidden', + build_by_default: false, +) + +idep_msl_to_nir = declare_dependency( + link_with : libmsl_compiler, +) + diff --git a/src/kosmickrisp/compiler/msl_iomap.c b/src/kosmickrisp/compiler/msl_iomap.c new file mode 100644 index 00000000000..aed3f012d29 --- /dev/null +++ b/src/kosmickrisp/compiler/msl_iomap.c @@ -0,0 +1,447 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +/* This file primarily concerns itself with mapping from the NIR (and Vulkan) + * model of I/O to the Metal one. */ + +#include "msl_private.h" + +#include "nir_builder.h" + +/* Mapping from alu type to Metal scalar type */ +static const char * +alu_type_to_string(nir_alu_type type) +{ + switch (type) { + case nir_type_uint8: + return "uchar"; + case nir_type_uint16: + return "ushort"; + case nir_type_uint32: + return "uint"; + case nir_type_uint64: + return "ulong"; + case nir_type_int8: + return "char"; + case nir_type_int16: + return "short"; + case nir_type_int32: + return "int"; + case nir_type_int64: + return "long"; + case nir_type_float16: + return "half"; + case nir_type_float32: + return "float"; + case nir_type_bool8: + return "bool"; + default: + UNREACHABLE("Unsupported nir_alu_type"); + } +}; + +/* Type suffix for a vector of a given size. */ +static const char *vector_suffixes[] = { + [1] = "", + [2] = "2", + [3] = "3", + [4] = "4", +}; + +/* The type names of the generated output structs */ +static const char *VERTEX_OUTPUT_TYPE = "VertexOut"; +static const char *FRAGMENT_OUTPUT_TYPE = "FragmentOut"; + +/* Mapping from NIR's varying slots to the generated struct member name */ +static const char *VARYING_SLOT_NAME[NUM_TOTAL_VARYING_SLOTS] = { + [VARYING_SLOT_POS] = "position", + [VARYING_SLOT_PSIZ] = "point_size", + [VARYING_SLOT_PRIMITIVE_ID] = "primitive_id", + [VARYING_SLOT_LAYER] = "layer", + [VARYING_SLOT_VAR0] = "vary_00", + [VARYING_SLOT_VAR1] = "vary_01", + [VARYING_SLOT_VAR2] = "vary_02", + [VARYING_SLOT_VAR3] = "vary_03", + [VARYING_SLOT_VAR4] = "vary_04", + [VARYING_SLOT_VAR5] = "vary_05", + [VARYING_SLOT_VAR6] = "vary_06", + [VARYING_SLOT_VAR7] = "vary_07", + [VARYING_SLOT_VAR8] = "vary_08", + [VARYING_SLOT_VAR9] = "vary_09", + [VARYING_SLOT_VAR10] = "vary_10", + [VARYING_SLOT_VAR11] = "vary_11", + [VARYING_SLOT_VAR12] = "vary_12", + [VARYING_SLOT_VAR13] = "vary_13", + [VARYING_SLOT_VAR14] = "vary_14", + [VARYING_SLOT_VAR15] = "vary_15", + [VARYING_SLOT_VAR16] = "vary_16", + [VARYING_SLOT_VAR17] = "vary_17", + [VARYING_SLOT_VAR18] = "vary_18", + [VARYING_SLOT_VAR19] = "vary_19", + [VARYING_SLOT_VAR20] = "vary_20", + [VARYING_SLOT_VAR21] = "vary_21", + [VARYING_SLOT_VAR22] = "vary_22", + [VARYING_SLOT_VAR23] = "vary_23", + [VARYING_SLOT_VAR24] = "vary_24", + [VARYING_SLOT_VAR25] = "vary_25", + [VARYING_SLOT_VAR26] = "vary_26", + [VARYING_SLOT_VAR27] = "vary_27", + [VARYING_SLOT_VAR28] = "vary_28", + [VARYING_SLOT_VAR29] = "vary_29", + [VARYING_SLOT_VAR30] = "vary_30", + [VARYING_SLOT_VAR31] = "vary_31", +}; + +/* Mapping from NIR varying slot to the MSL struct member attribute. */ +static const char *VARYING_SLOT_SEMANTIC[NUM_TOTAL_VARYING_SLOTS] = { + [VARYING_SLOT_POS] = "[[position]]", + [VARYING_SLOT_PSIZ] = "[[point_size]]", + [VARYING_SLOT_PRIMITIVE_ID] = "[[primitive_id]]", + [VARYING_SLOT_LAYER] = "[[render_target_array_index]]", + [VARYING_SLOT_VAR0] = "[[user(vary_00)]]", + [VARYING_SLOT_VAR1] = "[[user(vary_01)]]", + [VARYING_SLOT_VAR2] = "[[user(vary_02)]]", + [VARYING_SLOT_VAR3] = "[[user(vary_03)]]", + [VARYING_SLOT_VAR4] = "[[user(vary_04)]]", + [VARYING_SLOT_VAR5] = "[[user(vary_05)]]", + [VARYING_SLOT_VAR6] = "[[user(vary_06)]]", + [VARYING_SLOT_VAR7] = "[[user(vary_07)]]", + [VARYING_SLOT_VAR8] = "[[user(vary_08)]]", + [VARYING_SLOT_VAR9] = "[[user(vary_09)]]", + [VARYING_SLOT_VAR10] = "[[user(vary_10)]]", + [VARYING_SLOT_VAR11] = "[[user(vary_11)]]", + [VARYING_SLOT_VAR12] = "[[user(vary_12)]]", + [VARYING_SLOT_VAR13] = "[[user(vary_13)]]", + [VARYING_SLOT_VAR14] = "[[user(vary_14)]]", + [VARYING_SLOT_VAR15] = "[[user(vary_15)]]", + [VARYING_SLOT_VAR16] = "[[user(vary_16)]]", + [VARYING_SLOT_VAR17] = "[[user(vary_17)]]", + [VARYING_SLOT_VAR18] = "[[user(vary_18)]]", + [VARYING_SLOT_VAR19] = "[[user(vary_19)]]", + [VARYING_SLOT_VAR20] = "[[user(vary_20)]]", + [VARYING_SLOT_VAR21] = "[[user(vary_21)]]", + [VARYING_SLOT_VAR22] = "[[user(vary_22)]]", + [VARYING_SLOT_VAR23] = "[[user(vary_23)]]", + [VARYING_SLOT_VAR24] = "[[user(vary_24)]]", + [VARYING_SLOT_VAR25] = "[[user(vary_25)]]", + [VARYING_SLOT_VAR26] = "[[user(vary_26)]]", + [VARYING_SLOT_VAR27] = "[[user(vary_27)]]", + [VARYING_SLOT_VAR28] = "[[user(vary_28)]]", + [VARYING_SLOT_VAR29] = "[[user(vary_29)]]", + [VARYING_SLOT_VAR30] = "[[user(vary_30)]]", + [VARYING_SLOT_VAR31] = "[[user(vary_31)]]", +}; + +/* Mapping from NIR fragment output slot to MSL struct member name */ +static const char *FS_OUTPUT_NAME[] = { + [FRAG_RESULT_DEPTH] = "depth_out", + [FRAG_RESULT_STENCIL] = "stencil_out", + [FRAG_RESULT_SAMPLE_MASK] = "sample_mask_out", + [FRAG_RESULT_DATA0] = "color_0", + [FRAG_RESULT_DATA1] = "color_1", + [FRAG_RESULT_DATA2] = "color_2", + [FRAG_RESULT_DATA3] = "color_3", + [FRAG_RESULT_DATA4] = "color_4", + [FRAG_RESULT_DATA5] = "color_5", + [FRAG_RESULT_DATA6] = "color_6", + [FRAG_RESULT_DATA7] = "color_7", +}; + +/* Mapping from NIR fragment output slot to MSL struct member attribute */ +static const char *FS_OUTPUT_SEMANTIC[] = { + [FRAG_RESULT_DEPTH] = "", // special case, depends on depth layout + [FRAG_RESULT_STENCIL] = "stencil", [FRAG_RESULT_SAMPLE_MASK] = "sample_mask", + [FRAG_RESULT_DATA0] = "color(0)", [FRAG_RESULT_DATA1] = "color(1)", + [FRAG_RESULT_DATA2] = "color(2)", [FRAG_RESULT_DATA3] = "color(3)", + [FRAG_RESULT_DATA4] = "color(4)", [FRAG_RESULT_DATA5] = "color(5)", + [FRAG_RESULT_DATA6] = "color(6)", [FRAG_RESULT_DATA7] = "color(7)", +}; + +const char *depth_layout_arg[8] = { + [FRAG_DEPTH_LAYOUT_ANY] = "any", + [FRAG_DEPTH_LAYOUT_GREATER] = "greater", + [FRAG_DEPTH_LAYOUT_LESS] = "less", + [FRAG_DEPTH_LAYOUT_UNCHANGED] = "any", +}; + +/* Generate the struct definition for the vertex shader return value */ +static void +vs_output_block(nir_shader *shader, struct nir_to_msl_ctx *ctx) +{ + P(ctx, "struct %s {\n", VERTEX_OUTPUT_TYPE); + ctx->indentlevel++; + u_foreach_bit64(location, shader->info.outputs_written) { + struct io_slot_info info = ctx->outputs_info[location]; + const char *type = alu_type_to_string(info.type); + const char *vector_suffix = vector_suffixes[info.num_components]; + P_IND(ctx, "%s%s %s %s;\n", type, vector_suffix, + VARYING_SLOT_NAME[location], VARYING_SLOT_SEMANTIC[location]); + } + + ctx->indentlevel--; + P(ctx, "};\n"); +} + +/* Generate the struct definition for the fragment shader input argument */ +static void +fs_input_block(nir_shader *shader, struct nir_to_msl_ctx *ctx) +{ + P(ctx, "struct FragmentIn {\n"); + ctx->indentlevel++; + u_foreach_bit64(location, shader->info.inputs_read) { + struct io_slot_info info = ctx->inputs_info[location]; + const char *type = alu_type_to_string(info.type); + const char *vector_suffix = vector_suffixes[info.num_components]; + const char *interp = ""; + switch (info.interpolation) { + case INTERP_MODE_NOPERSPECTIVE: + if (info.centroid) + interp = "[[centroid_no_perspective]]"; + else if (info.sample) + interp = "[[sample_no_perspective]]"; + else + interp = "[[center_no_perspective]]"; + break; + case INTERP_MODE_FLAT: + interp = "[[flat]]"; + break; + default: + if (info.centroid) + interp = "[[centroid_perspective]]"; + else if (info.sample) + interp = "[[sample_perspective]]"; + break; + } + P_IND(ctx, "%s%s %s %s %s;\n", type, vector_suffix, + VARYING_SLOT_NAME[location], VARYING_SLOT_SEMANTIC[location], + interp); + } + + /* Enable reading from framebuffer */ + u_foreach_bit64(location, shader->info.outputs_read) { + struct io_slot_info info = ctx->outputs_info[location]; + const char *type = alu_type_to_string(info.type); + const char *vector_suffix = vector_suffixes[info.num_components]; + P_IND(ctx, "%s%s ", type, vector_suffix); + P(ctx, "%s [[%s, raster_order_group(0)]];\n", FS_OUTPUT_NAME[location], + FS_OUTPUT_SEMANTIC[location]); + } + + ctx->indentlevel--; + P(ctx, "};\n"); +} + +/* Generate the struct definition for the fragment shader return value */ +static void +fs_output_block(nir_shader *shader, struct nir_to_msl_ctx *ctx) +{ + P_IND(ctx, "struct %s {\n", FRAGMENT_OUTPUT_TYPE); + ctx->indentlevel++; + u_foreach_bit64(location, shader->info.outputs_written) { + struct io_slot_info info = ctx->outputs_info[location]; + const char *type = alu_type_to_string(info.type); + const char *vector_suffix = vector_suffixes[info.num_components]; + P_IND(ctx, "%s%s ", type, vector_suffix); + if (location == FRAG_RESULT_DEPTH) { + enum gl_frag_depth_layout depth_layout = shader->info.fs.depth_layout; + assert(depth_layout_arg[depth_layout]); + P(ctx, "%s [[depth(%s)]];\n", FS_OUTPUT_NAME[location], + depth_layout_arg[depth_layout]); + } else { + P(ctx, "%s [[%s]];\n", FS_OUTPUT_NAME[location], + FS_OUTPUT_SEMANTIC[location]); + } + } + ctx->indentlevel--; + P_IND(ctx, "};\n") +} + +struct gather_ctx { + struct io_slot_info *input; + struct io_slot_info *output; +}; + +static bool +msl_nir_gather_io_info(nir_builder *b, nir_intrinsic_instr *intrin, void *data) +{ + struct gather_ctx *ctx = (struct gather_ctx *)data; + switch (intrin->intrinsic) { + case nir_intrinsic_load_interpolated_input: { + unsigned component = nir_intrinsic_component(intrin); + struct nir_io_semantics io = nir_intrinsic_io_semantics(intrin); + assert(io.num_slots == 1u && "We don't support arrays"); + + unsigned location = nir_src_as_uint(intrin->src[1u]) + io.location; + ctx->input[location].type = nir_intrinsic_dest_type(intrin); + ctx->input[location].num_components = + MAX2(ctx->input[location].num_components, + intrin->num_components + component); + assert(ctx->input[location].num_components <= 4u && + "Cannot have more than a vec4"); + + nir_intrinsic_instr *interp_intrin = + nir_src_as_intrinsic(intrin->src[0u]); + ctx->input[location].interpolation = + nir_intrinsic_interp_mode(interp_intrin); + ctx->input[location].centroid = + interp_intrin->intrinsic == nir_intrinsic_load_barycentric_centroid; + ctx->input[location].sample = + interp_intrin->intrinsic == nir_intrinsic_load_barycentric_sample; + break; + } + case nir_intrinsic_load_input: { + unsigned component = nir_intrinsic_component(intrin); + struct nir_io_semantics io = nir_intrinsic_io_semantics(intrin); + assert(io.num_slots == 1u && "We don't support arrays"); + + unsigned location = nir_src_as_uint(intrin->src[0u]) + io.location; + ctx->input[location].type = nir_intrinsic_dest_type(intrin); + ctx->input[location].interpolation = INTERP_MODE_FLAT; + ctx->input[location].num_components = + MAX2(ctx->input[location].num_components, + intrin->num_components + component); + assert(ctx->input[location].num_components <= 4u && + "Cannot have more than a vec4"); + break; + } + case nir_intrinsic_load_output: { + unsigned component = nir_intrinsic_component(intrin); + struct nir_io_semantics io = nir_intrinsic_io_semantics(intrin); + assert(io.num_slots == 1u && "We don't support arrays"); + + unsigned location = nir_src_as_uint(intrin->src[0u]) + io.location; + ctx->output[location].type = nir_intrinsic_dest_type(intrin); + ctx->output[location].num_components = + MAX2(ctx->output[location].num_components, + intrin->num_components + component); + assert(ctx->output[location].num_components <= 4u && + "Cannot have more than a vec4"); + break; + } + case nir_intrinsic_store_output: { + unsigned component = nir_intrinsic_component(intrin); + unsigned write_mask = nir_intrinsic_write_mask(intrin); + struct nir_io_semantics io = nir_intrinsic_io_semantics(intrin); + assert(io.num_slots == 1u && "We don't support arrays"); + + /* Due to nir_lower_blend that doesn't generate intrinsics with the same + * num_components as destination, we need to compute current store's + * num_components using offset and mask. */ + unsigned num_components = component + 1u; + unsigned mask_left_most_index = 0u; + for (unsigned i = 0u; i < intrin->num_components; ++i) { + if ((write_mask >> i) & 1u) + mask_left_most_index = i; + } + num_components += mask_left_most_index; + unsigned location = nir_src_as_uint(intrin->src[1u]) + io.location; + ctx->output[location].type = nir_intrinsic_src_type(intrin); + ctx->output[location].num_components = + MAX3(ctx->output[location].num_components, num_components, + intrin->num_components); + assert(ctx->output[location].num_components <= 4u && + "Cannot have more than a vec4"); + break; + } + default: + break; + } + + return false; +} + +void +msl_gather_io_info(struct nir_to_msl_ctx *ctx, + struct io_slot_info *info_array_input, + struct io_slot_info *info_array_output) +{ + struct gather_ctx gather_ctx = { + .input = info_array_input, + .output = info_array_output, + }; + nir_shader_intrinsics_pass(ctx->shader, msl_nir_gather_io_info, + nir_metadata_all, &gather_ctx); +} + +/* Generate all the struct definitions needed for shader I/O */ +void +msl_emit_io_blocks(struct nir_to_msl_ctx *ctx, nir_shader *shader) +{ + switch (ctx->shader->info.stage) { + case MESA_SHADER_VERTEX: + vs_output_block(shader, ctx); + break; + case MESA_SHADER_FRAGMENT: + fs_input_block(shader, ctx); + fs_output_block(shader, ctx); + break; + case MESA_SHADER_COMPUTE: + break; + default: + assert(0); + } + // TODO_KOSMICKRISP This should not exist. We need to create input structs in + // nir that will later be translated + P(ctx, "struct Buffer {\n"); + ctx->indentlevel++; + P_IND(ctx, "uint64_t contents[1];\n"); // TODO_KOSMICKRISP This should not be + // a cpu pointer + ctx->indentlevel--; + P(ctx, "};\n") + + P(ctx, "struct SamplerTable {\n"); + ctx->indentlevel++; + P_IND(ctx, "sampler handles[1024];\n"); + ctx->indentlevel--; + P(ctx, "};\n") +} + +void +msl_emit_output_var(struct nir_to_msl_ctx *ctx, nir_shader *shader) +{ + switch (shader->info.stage) { + case MESA_SHADER_VERTEX: + P_IND(ctx, "%s out = {};\n", VERTEX_OUTPUT_TYPE); + break; + case MESA_SHADER_FRAGMENT: + P_IND(ctx, "%s out = {};\n", FRAGMENT_OUTPUT_TYPE); + + /* Load inputs to output */ + u_foreach_bit64(location, shader->info.outputs_read) { + P_IND(ctx, "out.%s = in.%s;\n", FS_OUTPUT_NAME[location], + FS_OUTPUT_NAME[location]); + } + break; + default: + break; + } +} + +const char * +msl_output_name(struct nir_to_msl_ctx *ctx, unsigned location) +{ + switch (ctx->shader->info.stage) { + case MESA_SHADER_VERTEX: + return VARYING_SLOT_NAME[location]; + case MESA_SHADER_FRAGMENT: + return FS_OUTPUT_NAME[location]; + default: + assert(0); + return ""; + } +} + +const char * +msl_input_name(struct nir_to_msl_ctx *ctx, unsigned location) +{ + switch (ctx->shader->info.stage) { + case MESA_SHADER_FRAGMENT: + return VARYING_SLOT_NAME[location]; + default: + assert(0); + return ""; + } +} diff --git a/src/kosmickrisp/compiler/msl_nir_algebraic.py b/src/kosmickrisp/compiler/msl_nir_algebraic.py new file mode 100644 index 00000000000..16d5fdaea9d --- /dev/null +++ b/src/kosmickrisp/compiler/msl_nir_algebraic.py @@ -0,0 +1,38 @@ +# Copyright 2025 LunarG, Inc. +# Copyright 2025 Google LLC +# Copyright 2022 Alyssa Rosenzweig +# Copyright 2021 Collabora, Ltd. +# Copyright 2016 Intel Corporation +# SPDX-License-Identifier: MIT + +import argparse +import sys +import math + +a = 'a' + +lower_pack = [ + # Based on the VIR lowering + (('f2f16_rtz', 'a@32'), + ('bcsel', ('flt', ('fabs', a), ('fabs', ('f2f32', ('f2f16_rtne', a)))), + ('isub', ('f2f16_rtne', a), 1), ('f2f16_rtne', a))), +] + + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--import-path', required=True) + args = parser.parse_args() + sys.path.insert(0, args.import_path) + run() + +def run(): + import nir_algebraic # pylint: disable=import-error + + print('#include "msl_private.h"') + + print(nir_algebraic.AlgebraicPass("msl_nir_lower_algebraic_late", lower_pack).render()) + +if __name__ == '__main__': + main() diff --git a/src/kosmickrisp/compiler/msl_nir_lower_common.c b/src/kosmickrisp/compiler/msl_nir_lower_common.c new file mode 100644 index 00000000000..34e0f264bcb --- /dev/null +++ b/src/kosmickrisp/compiler/msl_nir_lower_common.c @@ -0,0 +1,255 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ +#include "nir_to_msl.h" + +#include "nir.h" +#include "nir_builder.h" + +#include "util/format/u_format.h" + +bool +msl_nir_vs_remove_point_size_write(nir_builder *b, nir_intrinsic_instr *intrin, + void *data) +{ + if (intrin->intrinsic != nir_intrinsic_store_output) + return false; + + nir_io_semantics io = nir_intrinsic_io_semantics(intrin); + if (io.location == VARYING_SLOT_PSIZ) { + return nir_remove_sysval_output(intrin, MESA_SHADER_FRAGMENT); + } + + return false; +} + +bool +msl_nir_fs_remove_depth_write(nir_builder *b, nir_intrinsic_instr *intrin, + void *data) +{ + if (intrin->intrinsic != nir_intrinsic_store_output) + return false; + + nir_io_semantics io = nir_intrinsic_io_semantics(intrin); + if (io.location == FRAG_RESULT_DEPTH) { + return nir_remove_sysval_output(intrin, MESA_SHADER_FRAGMENT); + } + + return false; +} + +bool +msl_nir_fs_force_output_signedness( + nir_shader *nir, enum pipe_format render_target_formats[MAX_DRAW_BUFFERS]) +{ + assert(nir->info.stage == MESA_SHADER_FRAGMENT); + + bool update_derefs = false; + nir_foreach_variable_with_modes(var, nir, nir_var_shader_out) { + if (FRAG_RESULT_DATA0 <= var->data.location && + var->data.location <= FRAG_RESULT_DATA7 && + glsl_type_is_integer(var->type)) { + unsigned int slot = var->data.location - FRAG_RESULT_DATA0; + + if (glsl_type_is_uint_16_32_64(var->type) && + util_format_is_pure_sint(render_target_formats[slot])) { + var->type = glsl_ivec_type(var->type->vector_elements); + update_derefs = true; + } else if (glsl_type_is_int_16_32_64(var->type) && + util_format_is_pure_uint(render_target_formats[slot])) { + var->type = glsl_uvec_type(var->type->vector_elements); + update_derefs = true; + } + } + } + + if (update_derefs) { + nir_foreach_function_impl(impl, nir) { + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + switch (instr->type) { + case nir_instr_type_deref: { + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (deref->deref_type == nir_deref_type_var) { + deref->type = deref->var->type; + } + break; + } + default: + break; + } + } + } + nir_progress(update_derefs, impl, nir_metadata_control_flow); + } + } + + return update_derefs; +} + +bool +msl_lower_textures(nir_shader *nir) +{ + bool progress = false; + nir_lower_tex_options lower_tex_options = { + .lower_txp = ~0u, + .lower_sampler_lod_bias = true, + + /* We don't use 1D textures because they are really limited in Metal */ + .lower_1d = true, + + /* Metal does not support tg4 with individual offsets for each sample */ + .lower_tg4_offsets = true, + + /* Metal does not natively support offsets for texture.read operations */ + .lower_txf_offset = true, + .lower_txd_cube_map = true, + }; + + NIR_PASS(progress, nir, nir_lower_tex, &lower_tex_options); + return progress; +} + +static bool +replace_sample_id_for_sample_mask(nir_builder *b, nir_intrinsic_instr *intrin, + void *data) +{ + if (intrin->intrinsic != nir_intrinsic_load_sample_mask_in) + return false; + + nir_def_replace(nir_instr_def(&intrin->instr), (nir_def *)data); + return true; +} + +static bool +msl_replace_load_sample_mask_in_for_static_sample_mask( + nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + if (intr->intrinsic != nir_intrinsic_load_sample_mask_in) + return false; + + nir_def *sample_mask = (nir_def *)data; + nir_def_rewrite_uses(&intr->def, sample_mask); + return true; +} + +bool +msl_lower_static_sample_mask(nir_shader *nir, uint32_t sample_mask) +{ + /* Only support vertex for now */ + assert(nir->info.stage == MESA_SHADER_FRAGMENT); + + /* Embed sample mask */ + nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir); + nir_builder b = nir_builder_at(nir_before_impl(entrypoint)); + + struct nir_io_semantics io_semantics = { + .location = FRAG_RESULT_SAMPLE_MASK, + .num_slots = 1u, + }; + nir_def *sample_mask_def = nir_imm_int(&b, sample_mask); + nir_store_output(&b, sample_mask_def, nir_imm_int(&b, 0u), .base = 0u, + .range = 1u, .write_mask = 0x1, .component = 0u, + .src_type = nir_type_uint32, .io_semantics = io_semantics); + + return nir_shader_intrinsics_pass( + nir, msl_replace_load_sample_mask_in_for_static_sample_mask, + nir_metadata_control_flow, sample_mask_def); + + return true; +} + +bool +msl_ensure_depth_write(nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_FRAGMENT); + + bool has_depth_write = + nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH); + if (!has_depth_write) { + nir_variable *depth_var = nir_create_variable_with_location( + nir, nir_var_shader_out, FRAG_RESULT_DEPTH, glsl_float_type()); + + /* Write to depth at the very beginning */ + nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir); + nir_builder b = nir_builder_at(nir_before_impl(entrypoint)); + + nir_deref_instr *depth_deref = nir_build_deref_var(&b, depth_var); + nir_def *position = nir_load_frag_coord(&b); + nir_store_deref(&b, depth_deref, nir_channel(&b, position, 2u), + 0xFFFFFFFF); + + nir->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_DEPTH); + nir->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; + return nir_progress(true, entrypoint, nir_metadata_control_flow); + } + return false; +} + +bool +msl_ensure_vertex_position_output(nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_VERTEX); + + bool has_position_write = + nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_POS); + if (!has_position_write) { + nir_variable *position_var = nir_create_variable_with_location( + nir, nir_var_shader_out, VARYING_SLOT_POS, glsl_vec4_type()); + + /* Write to depth at the very beginning */ + nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir); + nir_builder b = nir_builder_at(nir_before_impl(entrypoint)); + + nir_deref_instr *position_deref = nir_build_deref_var(&b, position_var); + nir_def *zero = nir_imm_float(&b, 0.0f); + nir_store_deref(&b, position_deref, nir_vec4(&b, zero, zero, zero, zero), + 0xFFFFFFFF); + + nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_POS); + return nir_progress(true, entrypoint, nir_metadata_control_flow); + } + return false; +} + +static bool +msl_sample_mask_uint(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + if (intr->intrinsic == nir_intrinsic_store_output) { + struct nir_io_semantics io = nir_intrinsic_io_semantics(intr); + if (io.location == FRAG_RESULT_SAMPLE_MASK) + nir_intrinsic_set_src_type(intr, nir_type_uint32); + } + + return false; +} + +bool +msl_nir_sample_mask_type(nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_FRAGMENT); + return nir_shader_intrinsics_pass(nir, msl_sample_mask_uint, + nir_metadata_all, NULL); +} + +static bool +msl_layer_id_uint(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + if (intr->intrinsic == nir_intrinsic_store_output) { + struct nir_io_semantics io = nir_intrinsic_io_semantics(intr); + if (io.location == VARYING_SLOT_LAYER) + nir_intrinsic_set_src_type(intr, nir_type_uint32); + } + + return false; +} + +bool +msl_nir_layer_id_type(nir_shader *nir) +{ + assert(nir->info.stage == MESA_SHADER_VERTEX); + return nir_shader_intrinsics_pass(nir, msl_layer_id_uint, nir_metadata_all, + NULL); +} diff --git a/src/kosmickrisp/compiler/msl_nir_lower_subgroups.c b/src/kosmickrisp/compiler/msl_nir_lower_subgroups.c new file mode 100644 index 00000000000..30201145c4a --- /dev/null +++ b/src/kosmickrisp/compiler/msl_nir_lower_subgroups.c @@ -0,0 +1,98 @@ +/* + * Copyright 2023 Valve Corporation + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "msl_private.h" +#include "nir.h" +#include "nir_builder.h" + +static bool +needs_bool_widening(nir_intrinsic_instr *intrin) +{ + switch (intrin->intrinsic) { + case nir_intrinsic_read_invocation: + case nir_intrinsic_read_first_invocation: + case nir_intrinsic_reduce: + case nir_intrinsic_quad_broadcast: + case nir_intrinsic_quad_swap_horizontal: + case nir_intrinsic_quad_swap_vertical: + case nir_intrinsic_quad_swap_diagonal: + case nir_intrinsic_shuffle: + case nir_intrinsic_shuffle_down: + case nir_intrinsic_shuffle_up: + case nir_intrinsic_shuffle_xor: + return true; + default: + return false; + } +} + +static bool +lower_bool_ops(nir_builder *b, nir_intrinsic_instr *intrin, void *_unused) +{ + if (!needs_bool_widening(intrin)) + return false; + + if (intrin->def.bit_size != 1) + return false; + + b->cursor = nir_before_instr(&intrin->instr); + nir_def *widen = nir_b2i32(b, intrin->src[0].ssa); + nir_src_rewrite(&intrin->src[0], widen); + intrin->def.bit_size = 32; + b->cursor = nir_after_instr(&intrin->instr); + nir_def *narrow = nir_b2b1(b, &intrin->def); + nir_def_rewrite_uses_after(&intrin->def, narrow); + + return true; +} + +static bool +lower(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + b->cursor = nir_before_instr(&intr->instr); + + switch (intr->intrinsic) { + case nir_intrinsic_vote_any: { + /* We don't have vote instructions, but we have efficient ballots */ + nir_def *ballot = nir_ballot(b, 1, 32, intr->src[0].ssa); + nir_def_rewrite_uses(&intr->def, nir_ine_imm(b, ballot, 0)); + return true; + } + + case nir_intrinsic_vote_all: { + nir_def *ballot = nir_ballot(b, 1, 32, nir_inot(b, intr->src[0].ssa)); + nir_def_rewrite_uses(&intr->def, nir_ieq_imm(b, ballot, 0)); + return true; + } + + default: + return false; + } +} + +void +msl_nir_lower_subgroups(nir_shader *nir) +{ + const nir_lower_subgroups_options subgroups_options = { + .subgroup_size = 32, + .ballot_bit_size = 32, + .ballot_components = 1, + .lower_subgroup_masks = true, + .lower_vote_ieq = true, + .lower_vote_feq = true, + .lower_vote_bool_eq = true, + .lower_inverse_ballot = true, + .lower_relative_shuffle = true, + .lower_quad = true, + .lower_reduce = true, + }; + NIR_PASS(_, nir, nir_lower_subgroups, &subgroups_options); + NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower, + nir_metadata_control_flow, NULL); + NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_bool_ops, + nir_metadata_control_flow, NULL); +} diff --git a/src/kosmickrisp/compiler/msl_private.h b/src/kosmickrisp/compiler/msl_private.h new file mode 100644 index 00000000000..9ccd2bd7922 --- /dev/null +++ b/src/kosmickrisp/compiler/msl_private.h @@ -0,0 +1,77 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include "util/string_buffer.h" +#include "nir.h" + +struct io_slot_info { + nir_alu_type type; + uint32_t interpolation; + unsigned num_components; + bool centroid; + bool sample; +}; + +struct nir_to_msl_ctx { + FILE *output; + struct hash_table *types; + nir_shader *shader; + struct _mesa_string_buffer *text; + unsigned short indentlevel; + struct io_slot_info inputs_info[NUM_TOTAL_VARYING_SLOTS]; + struct io_slot_info outputs_info[NUM_TOTAL_VARYING_SLOTS]; +}; + +#define P_IND(ctx, ...) \ + do { \ + for (unsigned i = 0; i < (ctx)->indentlevel; i++) \ + _mesa_string_buffer_append((ctx)->text, " "); \ + _mesa_string_buffer_printf((ctx)->text, __VA_ARGS__); \ + } while (0); + +#define P(ctx, ...) _mesa_string_buffer_printf((ctx)->text, __VA_ARGS__); + +#define P_INDENT(ctx) \ + do { \ + for (unsigned i = 0; i < (ctx)->indentlevel; i++) \ + _mesa_string_buffer_append((ctx)->text, " "); \ + } while (0) + +/* Perform type inference. The returned value is a + * map from nir_def* to base type.*/ + +struct hash_table *msl_infer_types(nir_shader *shader); + +const char *msl_type_for_def(struct hash_table *types, nir_def *def); + +const char *msl_uint_type(uint8_t bit_size, uint8_t num_components); + +const char *msl_type_for_src(struct hash_table *types, nir_src *src); + +const char *msl_bitcast_for_src(struct hash_table *types, nir_src *src); + +void msl_src_as_const(struct nir_to_msl_ctx *ctx, nir_src *src); + +void msl_emit_io_blocks(struct nir_to_msl_ctx *ctx, nir_shader *shader); + +void msl_emit_output_var(struct nir_to_msl_ctx *ctx, nir_shader *shader); + +void msl_gather_io_info(struct nir_to_msl_ctx *ctx, + struct io_slot_info *info_array_input, + struct io_slot_info *info_array_output); + +const char *msl_input_name(struct nir_to_msl_ctx *ctx, unsigned location); + +const char *msl_output_name(struct nir_to_msl_ctx *ctx, unsigned location); + +bool msl_src_is_float(struct nir_to_msl_ctx *ctx, nir_src *src); +bool msl_def_is_sampler(struct nir_to_msl_ctx *ctx, nir_def *def); + +void msl_nir_lower_subgroups(nir_shader *nir); + +bool msl_nir_lower_algebraic_late(nir_shader *shader); diff --git a/src/kosmickrisp/compiler/msl_type_inference.c b/src/kosmickrisp/compiler/msl_type_inference.c new file mode 100644 index 00000000000..c10d90c3481 --- /dev/null +++ b/src/kosmickrisp/compiler/msl_type_inference.c @@ -0,0 +1,857 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "vulkan/vulkan_core.h" +#include "msl_private.h" + +typedef enum ti_type { + /* We haven't been able to assign a type yet */ + TYPE_NONE = 0, + /* All we know is that this is used in I/O, we + * can treat it as an opaque value (i.e. uint) */ + TYPE_GENERIC_DATA, + /* A generic int used in ALU operations but also can a bool for bitwise ops */ + TYPE_GENERIC_INT_OR_BOOL, + /* A generic int used in ALU operations that can be int or uint */ + TYPE_GENERIC_INT, + /* These are actual concrete types. */ + TYPE_INT, + TYPE_UINT, + TYPE_BOOL, + TYPE_FLOAT, + TYPE_SAMPLER, +} ti_type; + +static ti_type +unify_types(ti_type t1, ti_type t2) +{ + ti_type generic = MIN2(t1, t2); + ti_type specific = MAX2(t1, t2); + if (t1 == t2) + return TYPE_NONE; + // NONE or GENERIC_DATA can be upgraded into any concrete type + if (generic == TYPE_GENERIC_DATA || generic == TYPE_NONE) + return specific; + if ((generic == TYPE_GENERIC_INT_OR_BOOL) && + ((specific == TYPE_INT) || (specific == TYPE_UINT) || + (specific == TYPE_BOOL))) + return specific; + if ((generic == TYPE_GENERIC_INT) && + ((specific == TYPE_INT) || (specific == TYPE_UINT))) + return specific; + return TYPE_NONE; +} + +static ti_type +ti_type_from_nir(nir_alu_type nir_type) +{ + switch (nir_alu_type_get_base_type(nir_type)) { + case nir_type_int: + return TYPE_INT; + case nir_type_uint: + return TYPE_UINT; + case nir_type_float: + return TYPE_FLOAT; + case nir_type_bool: + return TYPE_BOOL; + default: + assert(0); + return TYPE_NONE; + } +} + +static ti_type +ti_type_from_pipe_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_R16_FLOAT: + case PIPE_FORMAT_R32_FLOAT: + return TYPE_FLOAT; + case PIPE_FORMAT_R8_UINT: + case PIPE_FORMAT_R16_UINT: + case PIPE_FORMAT_R32_UINT: + case PIPE_FORMAT_R64_UINT: + return TYPE_UINT; + case PIPE_FORMAT_R8_SINT: + case PIPE_FORMAT_R16_SINT: + case PIPE_FORMAT_R32_SINT: + case PIPE_FORMAT_R64_SINT: + return TYPE_INT; + default: + assert(0); + return 0u; + } +} + +static void +set_type(struct hash_table *types, void *key, ti_type type) +{ + // convert nir_type + _mesa_hash_table_insert(types, key, (void *)type); +} + +static ti_type +get_type(struct hash_table *types, void *key) +{ + struct hash_entry *entry = _mesa_hash_table_search(types, key); + if (!entry) + return TYPE_NONE; + return (ti_type)(intptr_t)(entry->data); +} + +static bool +update_instr_type(struct hash_table *types, nir_instr *instr, ti_type type) +{ + if (instr->type == nir_instr_type_alu) { + nir_alu_instr *alu = nir_instr_as_alu(instr); + switch (alu->op) { + case nir_op_iadd: + case nir_op_isub: + case nir_op_ishl: + case nir_op_iand: + case nir_op_ior: + case nir_op_ixor: + set_type(types, &alu->def, type); + set_type(types, &alu->src[0].src, type); + set_type(types, &alu->src[1].src, type); + return true; + case nir_op_inot: + set_type(types, &alu->def, type); + set_type(types, &alu->src[0].src, type); + return true; + case nir_op_ieq: + case nir_op_ine: + set_type(types, &alu->src[0].src, type); + set_type(types, &alu->src[1].src, type); + return true; + case nir_op_bcsel: + set_type(types, &alu->def, type); + set_type(types, &alu->src[1].src, type); + set_type(types, &alu->src[2].src, type); + return true; + case nir_op_mov: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + set_type(types, &alu->def, type); + for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) + set_type(types, &alu->src[i].src, type); + return true; + default: + return false; + } + } else if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + nir_intrinsic_info info = nir_intrinsic_infos[intr->intrinsic]; + switch (intr->intrinsic) { + case nir_intrinsic_load_reg: + set_type(types, &intr->def, type); + set_type(types, &intr->src[0], type); + return true; + case nir_intrinsic_store_reg: + set_type(types, &intr->src[0], type); + set_type(types, &intr->src[1], type); + return true; + case nir_intrinsic_decl_reg: + set_type(types, &intr->def, type); + return true; + case nir_intrinsic_load_global: + case nir_intrinsic_load_global_constant: + case nir_intrinsic_load_global_constant_bounded: + case nir_intrinsic_load_global_constant_offset: + case nir_intrinsic_load_push_constant: + set_type(types, &intr->def, type); + return true; + /* Scratch and shared are always UINT */ + case nir_intrinsic_load_scratch: + case nir_intrinsic_store_scratch: + case nir_intrinsic_load_shared: + case nir_intrinsic_store_shared: + return false; + case nir_intrinsic_store_global: + set_type(types, &intr->src[0], type); + return true; + case nir_intrinsic_read_first_invocation: + case nir_intrinsic_read_invocation: + case nir_intrinsic_quad_broadcast: + case nir_intrinsic_quad_swap_horizontal: + case nir_intrinsic_quad_swap_vertical: + case nir_intrinsic_quad_swap_diagonal: + case nir_intrinsic_shuffle: + case nir_intrinsic_shuffle_down: + case nir_intrinsic_shuffle_up: + case nir_intrinsic_shuffle_xor: + set_type(types, &intr->src[0], type); + set_type(types, &intr->def, type); + return true; + default: + if (info.has_dest && info.num_srcs == 0) { + set_type(types, &intr->def, type); + return true; + } + return false; + } + } else + return false; +} + +static void +infer_types_from_alu(struct hash_table *types, nir_alu_instr *alu) +{ + // for most types, we infer the type from the nir_op_info, + // but some ALU instructions are the same for int and uint. Those + // have their sources and defs get marked by TYPE_GENERIC_INT. + switch (alu->op) { + case nir_op_iadd: + case nir_op_isub: + case nir_op_ishl: + // (N, N) -> N + set_type(types, &alu->def, TYPE_GENERIC_INT); + set_type(types, &alu->src[0].src, TYPE_GENERIC_INT); + set_type(types, &alu->src[1].src, TYPE_GENERIC_INT); + break; + case nir_op_iand: + case nir_op_ior: + case nir_op_ixor: + set_type(types, &alu->def, TYPE_GENERIC_INT_OR_BOOL); + set_type(types, &alu->src[0].src, TYPE_GENERIC_INT_OR_BOOL); + set_type(types, &alu->src[1].src, TYPE_GENERIC_INT_OR_BOOL); + break; + case nir_op_inot: + // N -> N + set_type(types, &alu->def, TYPE_GENERIC_INT_OR_BOOL); + set_type(types, &alu->src[0].src, TYPE_GENERIC_INT_OR_BOOL); + break; + case nir_op_ieq: + case nir_op_ine: + // (N, N) -> bool + set_type(types, &alu->def, TYPE_BOOL); + set_type(types, &alu->src[0].src, TYPE_GENERIC_INT_OR_BOOL); + set_type(types, &alu->src[1].src, TYPE_GENERIC_INT_OR_BOOL); + break; + case nir_op_bcsel: + // (bool, T, T) -> T + set_type(types, &alu->def, TYPE_GENERIC_DATA); + set_type(types, &alu->src[0].src, TYPE_BOOL); + set_type(types, &alu->src[1].src, TYPE_GENERIC_DATA); + set_type(types, &alu->src[2].src, TYPE_GENERIC_DATA); + break; + // These don't provide any type information, we rely on type propagation + // to fill in the type data + case nir_op_mov: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + break; + /* We don't have 32-bit width boolean, those are uints. */ + case nir_op_b2b32: + set_type(types, &alu->def, TYPE_UINT); + set_type(types, &alu->src[0].src, TYPE_UINT); + break; + + default: { + // set type for def + const nir_op_info *info = &nir_op_infos[alu->op]; + set_type(types, &alu->def, ti_type_from_nir(info->output_type)); + for (int i = 0; i < info->num_inputs; i++) { + // set type for src + set_type(types, &alu->src[i].src, + ti_type_from_nir(info->input_types[i])); + } + } + } +} + +static void +infer_types_from_intrinsic(struct hash_table *types, nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_input: + case nir_intrinsic_load_interpolated_input: + case nir_intrinsic_load_output: { + ti_type ty = ti_type_from_nir(nir_intrinsic_dest_type(instr)); + set_type(types, &instr->def, ty); + break; + } + case nir_intrinsic_load_global_constant: + set_type(types, &instr->def, TYPE_GENERIC_DATA); + set_type(types, &instr->src[0], TYPE_UINT); + break; + case nir_intrinsic_load_global_constant_bounded: + set_type(types, &instr->def, TYPE_GENERIC_DATA); + set_type(types, &instr->src[0], TYPE_UINT); + set_type(types, &instr->src[1], TYPE_UINT); + set_type(types, &instr->src[2], TYPE_UINT); + break; + case nir_intrinsic_load_global_constant_offset: + set_type(types, &instr->def, TYPE_GENERIC_DATA); + set_type(types, &instr->src[0], TYPE_UINT); + set_type(types, &instr->src[1], TYPE_UINT); + break; + case nir_intrinsic_load_global: + case nir_intrinsic_load_push_constant: + set_type(types, &instr->def, TYPE_GENERIC_DATA); + set_type(types, &instr->src[0], TYPE_UINT); + break; + + case nir_intrinsic_global_atomic: + case nir_intrinsic_global_atomic_swap: + case nir_intrinsic_shared_atomic: + case nir_intrinsic_shared_atomic_swap: { + ti_type type = + ti_type_from_nir(nir_atomic_op_type(nir_intrinsic_atomic_op(instr))); + set_type(types, &instr->def, type); + set_type(types, &instr->src[0], TYPE_UINT); + set_type(types, &instr->src[1], type); + set_type(types, &instr->src[2], type); + break; + } + case nir_intrinsic_store_global: + set_type(types, &instr->src[0], TYPE_GENERIC_DATA); + set_type(types, &instr->src[1], TYPE_UINT); + break; + case nir_intrinsic_store_output: { + ti_type ty = ti_type_from_nir(nir_intrinsic_src_type(instr)); + set_type(types, &instr->src[0], ty); + break; + } + case nir_intrinsic_decl_reg: + if (nir_intrinsic_bit_size(instr) == 1) + set_type(types, &instr->def, TYPE_BOOL); + else + set_type(types, &instr->def, TYPE_NONE); + break; + case nir_intrinsic_store_reg: + set_type(types, &instr->src[0], TYPE_NONE); + set_type(types, &instr->src[1], TYPE_NONE); + break; + case nir_intrinsic_load_reg: + set_type(types, &instr->src[0], TYPE_NONE); + set_type(types, &instr->def, TYPE_NONE); + break; + case nir_intrinsic_load_scratch: + case nir_intrinsic_load_shared: + set_type(types, &instr->def, TYPE_UINT); + set_type(types, &instr->src[0], TYPE_UINT); + break; + case nir_intrinsic_store_scratch: + case nir_intrinsic_store_shared: + set_type(types, &instr->src[0], TYPE_UINT); + set_type(types, &instr->src[1], TYPE_UINT); + break; + case nir_intrinsic_load_workgroup_id: + case nir_intrinsic_load_subgroup_id: + case nir_intrinsic_load_local_invocation_id: + case nir_intrinsic_load_global_invocation_id: + case nir_intrinsic_load_num_workgroups: + case nir_intrinsic_load_num_subgroups: + case nir_intrinsic_load_subgroup_size: + case nir_intrinsic_load_sample_id: + case nir_intrinsic_load_sample_mask: + case nir_intrinsic_load_subgroup_invocation: + case nir_intrinsic_load_amplification_id_kk: + set_type(types, &instr->def, TYPE_UINT); + break; + case nir_intrinsic_load_vulkan_descriptor: + set_type(types, &instr->src[0], TYPE_UINT); + set_type(types, &instr->def, TYPE_UINT); + break; + case nir_intrinsic_load_buffer_ptr_kk: + set_type(types, &instr->def, TYPE_UINT); + break; + // The defs of these instructions don't participate in type inference + // but their sources are pointers (i.e. uints). + case nir_intrinsic_load_texture_handle_kk: + case nir_intrinsic_load_depth_texture_kk: + set_type(types, &instr->src[0], TYPE_UINT); + break; + case nir_intrinsic_load_sampler_handle_kk: + set_type(types, &instr->def, TYPE_SAMPLER); + break; + case nir_intrinsic_ddx: + case nir_intrinsic_ddy: + case nir_intrinsic_ddx_coarse: + case nir_intrinsic_ddy_coarse: + case nir_intrinsic_ddx_fine: + case nir_intrinsic_ddy_fine: + set_type(types, &instr->src[0], TYPE_FLOAT); + set_type(types, &instr->def, TYPE_FLOAT); + break; + case nir_intrinsic_load_point_coord: + set_type(types, &instr->def, TYPE_FLOAT); + break; + case nir_intrinsic_load_front_face: + case nir_intrinsic_elect: + case nir_intrinsic_load_helper_invocation: + case nir_intrinsic_is_helper_invocation: + set_type(types, &instr->def, TYPE_BOOL); + break; + case nir_intrinsic_load_constant_agx: + set_type(types, &instr->src[0], TYPE_UINT); + set_type(types, &instr->src[1], TYPE_UINT); + set_type(types, &instr->def, + ti_type_from_pipe_format(nir_intrinsic_format(instr))); + break; + case nir_intrinsic_bindless_image_load: + set_type(types, &instr->def, + ti_type_from_nir(nir_intrinsic_dest_type(instr))); + set_type(types, &instr->src[1], TYPE_UINT); // coords + set_type(types, &instr->src[3], TYPE_UINT); // level + break; + case nir_intrinsic_bindless_image_store: + set_type(types, &instr->src[1], TYPE_UINT); // coords + set_type(types, &instr->src[3], + ti_type_from_nir(nir_intrinsic_src_type(instr))); + set_type(types, &instr->src[4], TYPE_UINT); // level + break; + case nir_intrinsic_demote_if: + case nir_intrinsic_terminate_if: + set_type(types, &instr->src[0], TYPE_BOOL); + break; + case nir_intrinsic_bindless_image_atomic: + case nir_intrinsic_bindless_image_atomic_swap: { + set_type(types, &instr->src[1], TYPE_UINT); // coords + set_type(types, &instr->src[2], TYPE_UINT); // level + ti_type type = + ti_type_from_nir(nir_atomic_op_type(nir_intrinsic_atomic_op(instr))); + set_type(types, &instr->src[3], type); + if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_swap) + set_type(types, &instr->src[4], type); + set_type(types, &instr->def, type); + break; + } + case nir_intrinsic_ballot: + set_type(types, &instr->src[0], TYPE_BOOL); + set_type(types, &instr->def, TYPE_UINT); + break; + case nir_intrinsic_vote_all: + case nir_intrinsic_vote_any: + set_type(types, &instr->src[0], TYPE_BOOL); + set_type(types, &instr->def, TYPE_BOOL); + break; + case nir_intrinsic_read_first_invocation: + case nir_intrinsic_quad_swap_horizontal: + case nir_intrinsic_quad_swap_vertical: + case nir_intrinsic_quad_swap_diagonal: + set_type(types, &instr->src[0], TYPE_GENERIC_DATA); + set_type(types, &instr->def, TYPE_GENERIC_DATA); + break; + case nir_intrinsic_read_invocation: + case nir_intrinsic_quad_broadcast: + case nir_intrinsic_shuffle: + case nir_intrinsic_shuffle_down: + case nir_intrinsic_shuffle_up: + case nir_intrinsic_shuffle_xor: + set_type(types, &instr->src[0], TYPE_GENERIC_DATA); + set_type(types, &instr->def, TYPE_GENERIC_DATA); + set_type(types, &instr->src[1], TYPE_UINT); + break; + case nir_intrinsic_reduce: + switch (nir_intrinsic_reduction_op(instr)) { + case nir_op_iand: + case nir_op_ior: + case nir_op_ixor: + case nir_op_iadd: + case nir_op_imul: + set_type(types, &instr->src[0], TYPE_GENERIC_INT); + set_type(types, &instr->def, TYPE_GENERIC_INT); + break; + case nir_op_imax: + case nir_op_imin: + set_type(types, &instr->src[0], TYPE_INT); + set_type(types, &instr->def, TYPE_INT); + break; + case nir_op_umax: + case nir_op_umin: + set_type(types, &instr->src[0], TYPE_UINT); + set_type(types, &instr->def, TYPE_UINT); + break; + case nir_op_fadd: + case nir_op_fmax: + case nir_op_fmin: + case nir_op_fmul: + set_type(types, &instr->src[0], TYPE_FLOAT); + set_type(types, &instr->def, TYPE_FLOAT); + break; + default: + break; + } + break; + default: + break; + } +} + +static void +infer_types_from_tex(struct hash_table *types, nir_tex_instr *tex) +{ + set_type(types, &tex->def, ti_type_from_nir(tex->dest_type)); + for (int i = 0; i < tex->num_srcs; i++) { + nir_src *src = &tex->src[i].src; + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + if (tex->op == nir_texop_txf || tex->op == nir_texop_txf_ms) + set_type(types, src, TYPE_UINT); + else + set_type(types, src, TYPE_FLOAT); + break; + case nir_tex_src_comparator: + set_type(types, src, TYPE_FLOAT); + break; + case nir_tex_src_offset: + set_type(types, src, TYPE_INT); + break; + case nir_tex_src_bias: + set_type(types, src, TYPE_FLOAT); + break; + case nir_tex_src_lod: + if (tex->op == nir_texop_txf || tex->op == nir_texop_txf_ms || + tex->op == nir_texop_txs) + set_type(types, src, TYPE_UINT); + else + set_type(types, src, TYPE_FLOAT); + break; + case nir_tex_src_min_lod: + set_type(types, src, TYPE_FLOAT); + break; + case nir_tex_src_ms_index: + set_type(types, src, TYPE_UINT); + break; + case nir_tex_src_ddx: + case nir_tex_src_ddy: + set_type(types, src, TYPE_FLOAT); + break; + default: + break; + } + } +} + +static void +infer_types_from_instr(struct hash_table *types, nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + infer_types_from_alu(types, nir_instr_as_alu(instr)); + return; + case nir_instr_type_intrinsic: + infer_types_from_intrinsic(types, nir_instr_as_intrinsic(instr)); + return; + case nir_instr_type_tex: + infer_types_from_tex(types, nir_instr_as_tex(instr)); + break; + default: + break; + } +} + +static bool +propagate_types(struct hash_table *types, nir_instr *instr) +{ + bool progress = false; + switch (instr->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(instr); + nir_op_info info = nir_op_infos[alu->op]; + for (int i = 0; i < info.num_inputs; i++) { + ti_type src_type = get_type(types, &alu->src[i].src); + ti_type def_type = get_type(types, alu->src[i].src.ssa); + ti_type unified_type = unify_types(src_type, def_type); + nir_instr *parent_instr = alu->src[i].src.ssa->parent_instr; + if (unified_type > src_type) { + progress |= update_instr_type(types, instr, unified_type); + } else if (unified_type > def_type) { + progress |= update_instr_type(types, parent_instr, unified_type); + } + } + break; + } + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + nir_intrinsic_info info = nir_intrinsic_infos[intr->intrinsic]; + for (int i = 0; i < info.num_srcs; i++) { + ti_type src_type = get_type(types, &intr->src[i]); + ti_type def_type = get_type(types, intr->src[i].ssa); + ti_type unified_type = unify_types(src_type, def_type); + nir_instr *parent_instr = intr->src[i].ssa->parent_instr; + if (unified_type > src_type) { + progress |= update_instr_type(types, instr, unified_type); + } else if (unified_type > def_type) { + progress |= update_instr_type(types, parent_instr, unified_type); + } + } + break; + } + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + for (int i = 0; i < tex->num_srcs; i++) { + ti_type src_type = get_type(types, &tex->src[i].src); + ti_type def_type = get_type(types, tex->src[i].src.ssa); + ti_type unified_type = unify_types(src_type, def_type); + if (src_type == 0) + continue; + nir_instr *parent_instr = tex->src[i].src.ssa->parent_instr; + if (unified_type > def_type) { + progress |= update_instr_type(types, parent_instr, unified_type); + } + } + break; + } + default: + break; + } + return progress; +} + +static const char *float_names[] = {"float", "float2", "float3", "float4"}; +static const char *half_names[] = {"half", "half2", "half3", "half4"}; +static const char *bool_names[] = {"bool", "bool2", "bool3", "bool4"}; +static const char *int8_names[] = {"char", "char2", "char3", "char4"}; +static const char *uint8_names[] = {"uchar", "uchar2", "uchar3", "uchar4"}; +static const char *int16_names[] = {"short", "short2", "short3", "short4"}; +static const char *uint16_names[] = {"ushort", "ushort2", "ushort3", "ushort4"}; +static const char *int32_names[] = {"int", "int2", "int3", "int4"}; +static const char *uint32_names[] = {"uint", "uint2", "uint3", "uint4"}; +static const char *int64_names[] = {"long", "long2", "long3", "long4"}; +static const char *uint64_names[] = {"ulong", "ulong2", "ulong3", "ulong4"}; + +static const char * +ti_type_to_msl_type(ti_type type, uint8_t bit_width, uint8_t num_components) +{ + switch (type) { + case TYPE_GENERIC_DATA: + case TYPE_GENERIC_INT: + case TYPE_GENERIC_INT_OR_BOOL: + case TYPE_UINT: + switch (bit_width) { + case 1: + return bool_names[num_components - 1]; + case 8: + return uint8_names[num_components - 1]; + case 16: + return uint16_names[num_components - 1]; + case 32: + return uint32_names[num_components - 1]; + case 64: + return uint64_names[num_components - 1]; + default: + assert(!"Bad uint length"); + } + break; + case TYPE_BOOL: + return bool_names[num_components - 1]; + case TYPE_INT: + switch (bit_width) { + case 8: + return int8_names[num_components - 1]; + case 16: + return int16_names[num_components - 1]; + case 32: + return int32_names[num_components - 1]; + case 64: + return int64_names[num_components - 1]; + default: + assert(!"Bad uint length"); + } + break; + case TYPE_FLOAT: + switch (bit_width) { + case 16: + return half_names[num_components - 1]; + case 32: + return float_names[num_components - 1]; + default: + assert(!"Bad float length"); + } + break; + case TYPE_SAMPLER: + return "sampler"; + default: + return NULL; + } + + return NULL; +} + +const char * +msl_uint_type(uint8_t bit_size, uint8_t num_components) +{ + return ti_type_to_msl_type(TYPE_UINT, bit_size, num_components); +} + +const char * +msl_type_for_def(struct hash_table *types, nir_def *def) +{ + ti_type type = get_type(types, def); + return ti_type_to_msl_type(type, def->bit_size, def->num_components); +} + +const char * +msl_type_for_src(struct hash_table *types, nir_src *src) +{ + ti_type type = get_type(types, src); + // This won't necessarily work for alu srcs but for intrinsics it's fine. + return ti_type_to_msl_type(type, src->ssa->bit_size, + src->ssa->num_components); +} + +const char * +msl_bitcast_for_src(struct hash_table *types, nir_src *src) +{ + ti_type src_type = get_type(types, src); + ti_type def_type = get_type(types, src->ssa); + if (nir_src_is_if(src)) + return NULL; + if (src_type != def_type) { + /* bool types cannot use as_type casting */ + if (src_type == TYPE_BOOL || def_type == TYPE_BOOL) + return NULL; + + // produce bitcast _into_ src_type + return ti_type_to_msl_type(src_type, src->ssa->bit_size, + src->ssa->num_components); + } else { + return NULL; + } +} + +static void +emit_src_component(struct nir_to_msl_ctx *ctx, nir_src *src, unsigned comp) +{ + ti_type type = get_type(ctx->types, src); + switch (type) { + case TYPE_FLOAT: { + double v = nir_src_comp_as_float(*src, comp); + if (isinf(v)) { + P(ctx, "(INFINITY"); + } else if (isnan(v)) { + P(ctx, "(NAN"); + } else { + /* Building the types explicitly is required since the MSL compiler is + * too dumb to understand that "max(as_type<int>(t53), -2147483648)" is + * not ambiguous since both are ints and there's no room for longs. + * From CTS test: + * dEQP-VK.renderpass.suballocation.multisample.r32_sint.samples_2 */ + if (src->ssa->bit_size == 16) { + P(ctx, "half("); + } else { + P(ctx, "float("); + } + P(ctx, "%.*le", DBL_DECIMAL_DIG, nir_src_comp_as_float(*src, comp)); + } + break; + } + case TYPE_BOOL: + P(ctx, "bool(%d", nir_src_comp_as_bool(*src, comp)); + break; + case TYPE_INT: + switch (src->ssa->bit_size) { + case 8: + P(ctx, "char("); + break; + case 16: + P(ctx, "short("); + break; + case 32: + P(ctx, "int("); + break; + case 64: + P(ctx, "long("); + break; + default: + UNREACHABLE("Incorrect bit_size for TYPE_INT"); + } + P(ctx, "%" PRId64, nir_src_comp_as_int(*src, comp)); + break; + case TYPE_UINT: + case TYPE_GENERIC_DATA: + case TYPE_GENERIC_INT: + case TYPE_GENERIC_INT_OR_BOOL: + switch (src->ssa->bit_size) { + case 8: + P(ctx, "uchar("); + break; + case 16: + P(ctx, "ushort("); + break; + case 32: + P(ctx, "uint("); + break; + case 64: + P(ctx, "ulong("); + break; + default: + UNREACHABLE("Incorrect bit_size for TYPE_UINT"); + } + P(ctx, "%" PRIu64 "u", nir_src_comp_as_uint(*src, comp)); + break; + case TYPE_NONE: + assert(0); + P(ctx, "UNTYPED!"); + break; + default: + return; + } + P(ctx, ")"); +} + +void +msl_src_as_const(struct nir_to_msl_ctx *ctx, nir_src *src) +{ + ti_type type = get_type(ctx->types, src); + if (src->ssa->num_components == 1) { + emit_src_component(ctx, src, 0); + } else { + P(ctx, "%s(", + ti_type_to_msl_type(type, src->ssa->bit_size, + src->ssa->num_components)); + for (int i = 0; i < src->ssa->num_components; i++) { + if (i) + P(ctx, ", "); + emit_src_component(ctx, src, i); + } + P(ctx, ")"); + } +} + +struct hash_table * +msl_infer_types(nir_shader *shader) +{ + struct hash_table *types = _mesa_pointer_hash_table_create(NULL); + bool progress = false; + // First, seed the types for every instruction for every source and def + nir_foreach_function_impl(impl, shader) { + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + infer_types_from_instr(types, instr); + } + } + } + + do { + progress = false; + nir_foreach_function_impl(impl, shader) { + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + progress |= propagate_types(types, instr); + } + } + } + } while (progress); + return types; +} + +bool +msl_src_is_float(struct nir_to_msl_ctx *ctx, nir_src *src) +{ + return get_type(ctx->types, src) == TYPE_FLOAT; +} + +bool +msl_def_is_sampler(struct nir_to_msl_ctx *ctx, nir_def *def) +{ + return get_type(ctx->types, def) == TYPE_SAMPLER; +} diff --git a/src/kosmickrisp/compiler/nir_to_msl.c b/src/kosmickrisp/compiler/nir_to_msl.c new file mode 100644 index 00000000000..51b96bb2c62 --- /dev/null +++ b/src/kosmickrisp/compiler/nir_to_msl.c @@ -0,0 +1,2051 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "nir_to_msl.h" +#include "msl_private.h" +#include "nir.h" + +static const char * +get_stage_string(mesa_shader_stage stage) +{ + switch (stage) { + case MESA_SHADER_VERTEX: + return "vertex"; + case MESA_SHADER_FRAGMENT: + return "fragment"; + case MESA_SHADER_COMPUTE: + return "kernel"; + default: + assert(0); + return ""; + } +} + +static const char * +get_entrypoint_name(nir_shader *shader) +{ + return nir_shader_get_entrypoint(shader)->function->name; +} + +static const char *sysval_table[SYSTEM_VALUE_MAX] = { + [SYSTEM_VALUE_SUBGROUP_SIZE] = + "uint gl_SubGroupSize [[threads_per_simdgroup]]", + [SYSTEM_VALUE_SUBGROUP_INVOCATION] = + "uint gl_SubGroupInvocation [[thread_index_in_simdgroup]]", + [SYSTEM_VALUE_NUM_SUBGROUPS] = + "uint gl_NumSubGroups [[simdgroups_per_threadgroup]]", + [SYSTEM_VALUE_SUBGROUP_ID] = + "uint gl_SubGroupID [[simdgroup_index_in_threadgroup]]", + [SYSTEM_VALUE_WORKGROUP_ID] = + "uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]", + [SYSTEM_VALUE_LOCAL_INVOCATION_ID] = + "uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]", + [SYSTEM_VALUE_GLOBAL_INVOCATION_ID] = + "uint3 gl_GlobalInvocationID [[thread_position_in_grid]]", + [SYSTEM_VALUE_NUM_WORKGROUPS] = + "uint3 gl_NumWorkGroups [[threadgroups_per_grid]]", + [SYSTEM_VALUE_LOCAL_INVOCATION_INDEX] = + "uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]", + [SYSTEM_VALUE_VERTEX_ID] = "uint gl_VertexID [[vertex_id]]", + [SYSTEM_VALUE_INSTANCE_ID] = "uint gl_InstanceID [[instance_id]]", + [SYSTEM_VALUE_BASE_INSTANCE] = "uint gl_BaseInstance [[base_instance]]", + [SYSTEM_VALUE_FRAG_COORD] = "float4 gl_FragCoord [[position]]", + [SYSTEM_VALUE_POINT_COORD] = "float2 gl_PointCoord [[point_coord]]", + [SYSTEM_VALUE_FRONT_FACE] = "bool gl_FrontFacing [[front_facing]]", + [SYSTEM_VALUE_LAYER_ID] = "uint gl_Layer [[render_target_array_index]]", + [SYSTEM_VALUE_SAMPLE_ID] = "uint gl_SampleID [[sample_id]]", + [SYSTEM_VALUE_SAMPLE_MASK_IN] = "uint gl_SampleMask [[sample_mask]]", + [SYSTEM_VALUE_AMPLIFICATION_ID_KK] = + "uint mtl_AmplificationID [[amplification_id]]", + /* These are functions and not shader input variables */ + [SYSTEM_VALUE_HELPER_INVOCATION] = "", +}; + +static void +emit_sysvals(struct nir_to_msl_ctx *ctx, nir_shader *shader) +{ + unsigned i; + BITSET_FOREACH_SET(i, shader->info.system_values_read, SYSTEM_VALUE_MAX) { + assert(sysval_table[i]); + if (sysval_table[i] && sysval_table[i][0]) + P_IND(ctx, "%s,\n", sysval_table[i]); + } +} + +static void +emit_inputs(struct nir_to_msl_ctx *ctx, nir_shader *shader) +{ + switch (shader->info.stage) { + case MESA_SHADER_FRAGMENT: + P_IND(ctx, "FragmentIn in [[stage_in]],\n"); + break; + default: + break; + } + P_IND(ctx, "constant Buffer &buf0 [[buffer(0)]],\n"); + P_IND(ctx, "constant SamplerTable &sampler_table [[buffer(1)]]\n"); +} + +static const char * +output_type(nir_shader *shader) +{ + switch (shader->info.stage) { + case MESA_SHADER_VERTEX: + return "VertexOut"; + case MESA_SHADER_FRAGMENT: + return "FragmentOut"; + default: + return "void"; + } +} + +static void +emit_local_vars(struct nir_to_msl_ctx *ctx, nir_shader *shader) +{ + if (shader->info.shared_size) { + P_IND(ctx, "threadgroup char shared_data[%d];\n", + shader->info.shared_size); + } + if (shader->scratch_size) { + P_IND(ctx, "uchar scratch[%d] = {0};\n", shader->scratch_size); + } + if (BITSET_TEST(shader->info.system_values_read, + SYSTEM_VALUE_HELPER_INVOCATION)) { + P_IND(ctx, "bool gl_HelperInvocation = simd_is_helper_thread();\n"); + } +} + +static bool +is_register(nir_def *def) +{ + return ((def->parent_instr->type == nir_instr_type_intrinsic) && + (nir_instr_as_intrinsic(def->parent_instr)->intrinsic == + nir_intrinsic_load_reg)); +} + +static void +writemask_to_msl(struct nir_to_msl_ctx *ctx, unsigned write_mask, + unsigned num_components) +{ + if (num_components != util_bitcount(write_mask)) { + P(ctx, "."); + for (unsigned i = 0; i < num_components; i++) + if ((write_mask >> i) & 1) + P(ctx, "%c", "xyzw"[i]); + } +} + +static void +src_to_msl(struct nir_to_msl_ctx *ctx, nir_src *src) +{ + /* Pointer types cannot use as_type casting */ + const char *bitcast = msl_bitcast_for_src(ctx->types, src); + if (nir_src_is_const(*src)) { + msl_src_as_const(ctx, src); + return; + } + if (nir_src_is_undef(*src)) { + if (src->ssa->num_components == 1) { + P(ctx, "00"); + } else { + P(ctx, "%s(", msl_type_for_src(ctx->types, src)); + for (int i = 0; i < src->ssa->num_components; i++) { + if (i) + P(ctx, ", "); + P(ctx, "00"); + } + P(ctx, ")"); + } + return; + } + + if (bitcast) + P(ctx, "as_type<%s>(", bitcast); + if (is_register(src->ssa)) { + nir_intrinsic_instr *instr = + nir_instr_as_intrinsic(src->ssa->parent_instr); + if (src->ssa->bit_size != 1u) { + P(ctx, "as_type<%s>(r%d)", msl_type_for_def(ctx->types, src->ssa), + instr->src[0].ssa->index); + } else { + P(ctx, "%s(r%d)", msl_type_for_def(ctx->types, src->ssa), + instr->src[0].ssa->index); + } + } else if (nir_src_is_const(*src)) { + msl_src_as_const(ctx, src); + } else { + P(ctx, "t%d", src->ssa->index); + } + if (bitcast) + P(ctx, ")"); +} + +static void +alu_src_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr, int srcn) +{ + nir_alu_src *src = &instr->src[srcn]; + src_to_msl(ctx, &src->src); + if (!nir_alu_src_is_trivial_ssa(instr, srcn) && + src->src.ssa->num_components > 1) { + int num_components = nir_src_num_components(src->src); + assert(num_components <= 4); + + P(ctx, "."); + for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { + if (!nir_alu_instr_channel_used(instr, srcn, i)) + continue; + P(ctx, "%c", "xyzw"[src->swizzle[i]]); + } + } +} + +static void +alu_funclike(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr, const char *name) +{ + const nir_op_info *info = &nir_op_infos[instr->op]; + P(ctx, "%s(", name); + for (int i = 0; i < info->num_inputs; i++) { + alu_src_to_msl(ctx, instr, i); + if (i < info->num_inputs - 1) + P(ctx, ", "); + } + P(ctx, ")"); +} + +static void +alu_to_msl(struct nir_to_msl_ctx *ctx, nir_alu_instr *instr) +{ + +#define ALU_BINOP(op) \ + do { \ + alu_src_to_msl(ctx, instr, 0); \ + P(ctx, " %s ", op); \ + alu_src_to_msl(ctx, instr, 1); \ + } while (0); + + switch (instr->op) { + case nir_op_isign: + alu_src_to_msl(ctx, instr, 0); + P(ctx, " == 0 ? 0.0 : (("); + alu_src_to_msl(ctx, instr, 0); + P(ctx, " < 0) ? -1 : 1)"); + break; + case nir_op_iadd: + case nir_op_fadd: + ALU_BINOP("+"); + break; + case nir_op_uadd_sat: + case nir_op_iadd_sat: + alu_funclike(ctx, instr, "addsat"); + break; + case nir_op_isub: + case nir_op_fsub: + ALU_BINOP("-"); + break; + case nir_op_imul: + case nir_op_fmul: + ALU_BINOP("*"); + break; + case nir_op_idiv: + case nir_op_udiv: + case nir_op_fdiv: + ALU_BINOP("/"); + break; + case nir_op_irem: + ALU_BINOP("%"); + break; + case nir_op_ishl: + ALU_BINOP("<<"); + break; + case nir_op_ishr: + case nir_op_ushr: + ALU_BINOP(">>"); + break; + case nir_op_ige: + case nir_op_uge: + case nir_op_fge: + ALU_BINOP(">="); + break; + case nir_op_ilt: + case nir_op_ult: + case nir_op_flt: + ALU_BINOP("<") + break; + case nir_op_iand: + ALU_BINOP("&"); + break; + case nir_op_ior: + ALU_BINOP("|"); + break; + case nir_op_ixor: + ALU_BINOP("^"); + break; + case nir_op_bitfield_insert: + alu_funclike(ctx, instr, "insert_bits"); + break; + case nir_op_ibitfield_extract: + case nir_op_ubitfield_extract: + alu_funclike(ctx, instr, "extract_bits"); + break; + case nir_op_bitfield_reverse: + alu_funclike(ctx, instr, "reverse_bits"); + break; + case nir_op_bit_count: + alu_funclike(ctx, instr, "popcount"); + break; + case nir_op_uclz: + alu_funclike(ctx, instr, "clz"); + break; + case nir_op_ieq: + case nir_op_feq: + ALU_BINOP("=="); + break; + case nir_op_ine: + case nir_op_fneu: + ALU_BINOP("!="); + break; + case nir_op_umax: + case nir_op_imax: + alu_funclike(ctx, instr, "max"); + break; + case nir_op_umin: + case nir_op_imin: + alu_funclike(ctx, instr, "min"); + break; + case nir_op_umod: + case nir_op_imod: + ALU_BINOP("%"); + break; + case nir_op_imul_high: + case nir_op_umul_high: + alu_funclike(ctx, instr, "mulhi"); + break; + case nir_op_usub_sat: + alu_funclike(ctx, instr, "subsat"); + break; + case nir_op_fsat: + alu_funclike(ctx, instr, "saturate"); + break; + /* Functions from <metal_relational> */ + case nir_op_fisfinite: + alu_funclike(ctx, instr, "isfinite"); + break; + case nir_op_fisnormal: + alu_funclike(ctx, instr, "isnormal"); + break; + /* Functions from <metal_math> */ + case nir_op_iabs: + case nir_op_fabs: + alu_funclike(ctx, instr, "abs"); + break; + case nir_op_fceil: + alu_funclike(ctx, instr, "ceil"); + break; + case nir_op_fcos: + alu_funclike(ctx, instr, "cos"); + break; + case nir_op_fdot2: + case nir_op_fdot3: + case nir_op_fdot4: + alu_funclike(ctx, instr, "dot"); + break; + case nir_op_fexp2: + alu_funclike(ctx, instr, "exp2"); + break; + case nir_op_ffloor: + alu_funclike(ctx, instr, "floor"); + break; + case nir_op_ffma: + alu_funclike(ctx, instr, "fma"); + break; + case nir_op_ffract: + alu_funclike(ctx, instr, "fract"); + break; + case nir_op_flog2: + alu_funclike(ctx, instr, "log2"); + break; + case nir_op_flrp: + alu_funclike(ctx, instr, "mix"); + break; + case nir_op_fmax: + alu_funclike(ctx, instr, "fmax"); + break; + case nir_op_fmin: + alu_funclike(ctx, instr, "fmin"); + break; + case nir_op_frem: + alu_funclike(ctx, instr, "fmod"); + break; + case nir_op_fpow: + alu_funclike(ctx, instr, "pow"); + break; + case nir_op_fround_even: + alu_funclike(ctx, instr, "rint"); + break; + case nir_op_frsq: + alu_funclike(ctx, instr, "rsqrt"); + break; + case nir_op_fsign: + alu_funclike(ctx, instr, "sign"); + break; + case nir_op_fsqrt: + alu_funclike(ctx, instr, "sqrt"); + break; + case nir_op_fsin: + alu_funclike(ctx, instr, "sin"); + break; + case nir_op_ldexp: + alu_funclike(ctx, instr, "ldexp"); + break; + case nir_op_ftrunc: + alu_funclike(ctx, instr, "trunc"); + break; + case nir_op_pack_snorm_4x8: + alu_funclike(ctx, instr, "pack_float_to_snorm4x8"); + break; + case nir_op_pack_unorm_4x8: + alu_funclike(ctx, instr, "pack_float_to_unorm4x8"); + break; + case nir_op_pack_snorm_2x16: + alu_funclike(ctx, instr, "pack_float_to_snorm2x16"); + break; + case nir_op_pack_unorm_2x16: + alu_funclike(ctx, instr, "pack_float_to_unorm2x16"); + break; + case nir_op_unpack_snorm_4x8: + alu_funclike(ctx, instr, "unpack_snorm4x8_to_float"); + break; + case nir_op_unpack_unorm_4x8: + alu_funclike(ctx, instr, "unpack_unorm4x8_to_float"); + break; + case nir_op_unpack_snorm_2x16: + alu_funclike(ctx, instr, "unpack_snorm2x16_to_float"); + break; + case nir_op_unpack_unorm_2x16: + alu_funclike(ctx, instr, "unpack_unorm2x16_to_float"); + break; + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + case nir_op_b2b1: + case nir_op_b2b32: + case nir_op_b2i8: + case nir_op_b2i16: + case nir_op_b2i32: + case nir_op_b2i64: + case nir_op_b2f16: + case nir_op_i2f16: + case nir_op_u2f16: + case nir_op_i2f32: + case nir_op_u2f32: + case nir_op_i2i8: + case nir_op_i2i16: + case nir_op_i2i32: + case nir_op_i2i64: + case nir_op_f2i8: + case nir_op_f2i16: + case nir_op_f2i32: + case nir_op_f2i64: + case nir_op_f2u8: + case nir_op_f2u16: + case nir_op_f2u32: + case nir_op_f2u64: + case nir_op_u2u8: + case nir_op_u2u16: + case nir_op_u2u32: + case nir_op_u2u64: + case nir_op_f2f16: + case nir_op_f2f16_rtne: + case nir_op_f2f32: + alu_funclike(ctx, instr, msl_type_for_def(ctx->types, &instr->def)); + break; + case nir_op_unpack_half_2x16_split_x: + P(ctx, "float(as_type<half>(ushort(t%d & 0x0000ffff)))", + instr->src[0].src.ssa->index); + break; + case nir_op_frcp: + P(ctx, "1/"); + alu_src_to_msl(ctx, instr, 0); + break; + case nir_op_inot: + if (instr->src[0].src.ssa->bit_size == 1) { + P(ctx, "!"); + } else { + P(ctx, "~"); + } + alu_src_to_msl(ctx, instr, 0); + break; + case nir_op_ineg: + case nir_op_fneg: + P(ctx, "-"); + alu_src_to_msl(ctx, instr, 0); + break; + case nir_op_mov: + alu_src_to_msl(ctx, instr, 0); + break; + case nir_op_b2f32: + alu_src_to_msl(ctx, instr, 0); + P(ctx, " ? 1.0 : 0.0"); + break; + case nir_op_bcsel: + alu_src_to_msl(ctx, instr, 0); + P(ctx, " ? "); + alu_src_to_msl(ctx, instr, 1); + P(ctx, " : "); + alu_src_to_msl(ctx, instr, 2); + break; + default: + P(ctx, "ALU %s", nir_op_infos[instr->op].name); + } +} + +static const char * +texture_dim(enum glsl_sampler_dim dim) +{ + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return "1d"; + case GLSL_SAMPLER_DIM_2D: + return "2d"; + case GLSL_SAMPLER_DIM_3D: + return "3d"; + case GLSL_SAMPLER_DIM_CUBE: + return "cube"; + case GLSL_SAMPLER_DIM_BUF: + return "_buffer"; + case GLSL_SAMPLER_DIM_MS: + return "2d_ms"; + default: + fprintf(stderr, "Bad texture dim %d\n", dim); + assert(!"Bad texture dimension"); + return "BAD"; + } +} + +static const char * +tex_type_name(nir_alu_type ty) +{ + switch (ty) { + case nir_type_int16: + return "short"; + case nir_type_int32: + return "int"; + case nir_type_uint16: + return "ushort"; + case nir_type_uint32: + return "uint"; + case nir_type_float16: + return "half"; + case nir_type_float32: + return "float"; + default: + return "BAD"; + } +} + +static bool +instrinsic_needs_dest_type(nir_intrinsic_instr *instr) +{ + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + nir_intrinsic_op op = instr->intrinsic; + if (op == nir_intrinsic_decl_reg || op == nir_intrinsic_load_reg || + op == nir_intrinsic_load_texture_handle_kk || + op == nir_intrinsic_load_depth_texture_kk || + /* Atomic swaps have a custom codegen */ + op == nir_intrinsic_global_atomic_swap || + op == nir_intrinsic_shared_atomic_swap || + op == nir_intrinsic_bindless_image_atomic_swap) + return false; + return info->has_dest; +} + +static const char * +msl_pipe_format_to_msl_type(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_R16_FLOAT: + return "half"; + case PIPE_FORMAT_R32_FLOAT: + return "float"; + case PIPE_FORMAT_R8_UINT: + return "uchar"; + case PIPE_FORMAT_R16_UINT: + return "ushort"; + case PIPE_FORMAT_R32_UINT: + return "uint"; + case PIPE_FORMAT_R64_UINT: + return "unsigned long"; + case PIPE_FORMAT_R8_SINT: + return "char"; + case PIPE_FORMAT_R16_SINT: + return "short"; + case PIPE_FORMAT_R32_SINT: + return "int"; + case PIPE_FORMAT_R64_SINT: + return "long"; + default: + assert(0); + return ""; + } +} + +static const char * +component_str(uint8_t num_components) +{ + switch (num_components) { + default: + case 1: + return ""; + case 2: + return "2"; + case 3: + return "3"; + case 4: + return "4"; + } +} + +static void +round_src_component_to_uint(struct nir_to_msl_ctx *ctx, nir_src *src, + char component) +{ + bool is_float = msl_src_is_float(ctx, src); + if (is_float) { + P(ctx, "uint(rint("); + } + src_to_msl(ctx, src); + P(ctx, ".%c", component); + if (is_float) { + P(ctx, "))"); + } +} + +static void +texture_src_coord_swizzle(struct nir_to_msl_ctx *ctx, nir_src *coord, + uint32_t num_components, bool is_cube, bool is_array) +{ + src_to_msl(ctx, coord); + + uint32_t coord_components = + num_components - (uint32_t)is_array - (uint32_t)is_cube; + if (coord_components < coord->ssa->num_components) { + const char *swizzle = "xyzw"; + uint32_t i = 0; + P(ctx, "."); + for (i = 0; i < coord_components; i++) + P(ctx, "%c", swizzle[i]); + + if (is_cube) { + P(ctx, ", "); + round_src_component_to_uint(ctx, coord, swizzle[i++]); + } + if (is_array) { + P(ctx, ", "); + round_src_component_to_uint(ctx, coord, swizzle[i++]); + } + } +} + +static void +image_coord_swizzle(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr) +{ + unsigned comps = 0; + bool is_array = nir_intrinsic_image_array(instr); + bool is_cube = false; + switch (nir_intrinsic_image_dim(instr)) { + case GLSL_SAMPLER_DIM_BUF: + case GLSL_SAMPLER_DIM_1D: + comps = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_MS: + comps = 2; + break; + case GLSL_SAMPLER_DIM_3D: + comps = 3; + break; + case GLSL_SAMPLER_DIM_CUBE: + comps = 3; + is_cube = true; + break; + default: + assert(!"Bad dimension for image"); + break; + } + if (is_array) + comps += 1; + + texture_src_coord_swizzle(ctx, &instr->src[1], comps, is_cube, is_array); +} + +/* Non-packed types have stricter alignment requirements that packed types. + * This helps us build a packed format for storage. + */ +static void +src_to_packed(struct nir_to_msl_ctx *ctx, nir_src *src, const char *type, + uint32_t component_count) +{ + if (component_count == 1) { + P(ctx, "%s(", type); + } else { + P(ctx, "packed_%s(", type); + } + src_to_msl(ctx, src); + P(ctx, ")"); +} + +/* Non-packed types have stricter alignment requirements that packed types. + * This helps us cast the pointer to a packed type and then it builds the + * non-packed type for Metal usage. + */ +static void +src_to_packed_load(struct nir_to_msl_ctx *ctx, nir_src *src, + const char *addressing, const char *type, + uint32_t component_count) +{ + if (component_count == 1) { + P(ctx, "*(%s %s*)(", addressing, type); + } else { + P(ctx, "%s(*(%s packed_%s*)", type, addressing, type); + } + src_to_msl(ctx, src); + P(ctx, ")"); +} + +/* Non-packed types have stricter alignment requirements that packed types. + * This helps us cast the pointer to a packed type and then it builds the + * non-packed type for Metal usage. + */ +static void +src_to_packed_load_offset(struct nir_to_msl_ctx *ctx, nir_src *src, + nir_src *offset, const char *addressing, + const char *type, uint32_t component_count) +{ + if (component_count == 1) { + P(ctx, "*(%s %s*)((", addressing, type); + } else { + P(ctx, "%s(*(%s packed_%s*)(", type, addressing, type); + } + src_to_msl(ctx, src); + P(ctx, " + "); + src_to_msl(ctx, offset); + P(ctx, "))"); +} + +/* Non-packed types have stricter alignment requirements that packed types. + * This helps us cast the pointer to a packed type for storage. + */ +static void +src_to_packed_store(struct nir_to_msl_ctx *ctx, nir_src *src, + const char *addressing, const char *type, + uint32_t num_components) +{ + if (num_components == 1) { + P_IND(ctx, "*(%s %s*)", addressing, type); + } else { + P_IND(ctx, "*(%s packed_%s*)", addressing, type); + } + src_to_msl(ctx, src); +} + +static const char * +atomic_op_to_msl(nir_atomic_op op) +{ + switch (op) { + case nir_atomic_op_iadd: + case nir_atomic_op_fadd: + return "atomic_fetch_add"; + case nir_atomic_op_umin: + case nir_atomic_op_imin: + case nir_atomic_op_fmin: + return "atomic_fetch_min"; + case nir_atomic_op_umax: + case nir_atomic_op_imax: + case nir_atomic_op_fmax: + return "atomic_fetch_max"; + case nir_atomic_op_iand: + return "atomic_fetch_and"; + case nir_atomic_op_ior: + return "atomic_fetch_or"; + case nir_atomic_op_ixor: + return "atomic_fetch_xor"; + case nir_atomic_op_xchg: + return "atomic_exchange"; + case nir_atomic_op_cmpxchg: + case nir_atomic_op_fcmpxchg: + return "atomic_compare_exchange_weak"; + default: + UNREACHABLE("Unhandled atomic op"); + } +} + +static void +atomic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr, + const char *scope, bool shared) +{ + const char *atomic_op = atomic_op_to_msl(nir_intrinsic_atomic_op(instr)); + const char *mem_order = "memory_order_relaxed"; + + P(ctx, "%s_explicit((%s atomic_%s*)", atomic_op, scope, + msl_type_for_def(ctx->types, &instr->def)); + if (shared) + P(ctx, "&shared_data["); + src_to_msl(ctx, &instr->src[0]); + if (shared) + P(ctx, "]"); + P(ctx, ", "); + src_to_msl(ctx, &instr->src[1]); + P(ctx, ", %s", mem_order); + P(ctx, ");\n"); +} + +static void +atomic_swap_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr, + const char *scope, bool shared) +{ + const char *atomic_op = atomic_op_to_msl(nir_intrinsic_atomic_op(instr)); + const char *mem_order = "memory_order_relaxed"; + const char *type = msl_type_for_def(ctx->types, &instr->def); + + P_IND(ctx, "%s ta%d = ", type, instr->def.index); + src_to_msl(ctx, &instr->src[1]); + P(ctx, "; %s_explicit((%s atomic_%s*)", atomic_op, scope, type); + if (shared) + P(ctx, "&shared_data["); + src_to_msl(ctx, &instr->src[0]); + if (shared) + P(ctx, "]"); + P(ctx, ", "); + P(ctx, "&ta%d, ", instr->def.index); + src_to_msl(ctx, &instr->src[2]); + P(ctx, ", %s, %s);", mem_order, mem_order); + P(ctx, "%s t%d = ta%d;\n", type, instr->def.index, instr->def.index); +} + +static void +memory_modes_to_msl(struct nir_to_msl_ctx *ctx, nir_variable_mode modes) +{ + bool requires_or = false; + u_foreach_bit(i, modes) { + nir_variable_mode single_mode = (1 << i); + if (requires_or) + P(ctx, " | "); + switch (single_mode) { + case nir_var_image: + P(ctx, "mem_flags::mem_texture"); + break; + case nir_var_mem_ssbo: + case nir_var_mem_global: + P(ctx, "mem_flags::mem_device"); + break; + case nir_var_function_temp: + P(ctx, "mem_flags::mem_none"); + break; + case nir_var_mem_shared: + P(ctx, "mem_flags::mem_threadgroup"); + break; + default: + UNREACHABLE("bad_memory_mode"); + } + requires_or = true; + } +} + +static uint32_t +get_input_num_components(struct nir_to_msl_ctx *ctx, uint32_t location) +{ + return ctx->inputs_info[location].num_components; +} + +static uint32_t +get_output_num_components(struct nir_to_msl_ctx *ctx, uint32_t location) +{ + return ctx->outputs_info[location].num_components; +} + +static void +intrinsic_to_msl(struct nir_to_msl_ctx *ctx, nir_intrinsic_instr *instr) +{ + /* These instructions are only used to understand interpolation modes, they + * don't generate any code. */ + if (instr->intrinsic == nir_intrinsic_load_barycentric_pixel || + instr->intrinsic == nir_intrinsic_load_barycentric_centroid || + instr->intrinsic == nir_intrinsic_load_barycentric_sample) + return; + + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + if (instrinsic_needs_dest_type(instr)) { + P_IND(ctx, "t%d = ", instr->def.index); + } + switch (instr->intrinsic) { + case nir_intrinsic_decl_reg: { + const char *reg_type = msl_uint_type(nir_intrinsic_bit_size(instr), + nir_intrinsic_num_components(instr)); + P_IND(ctx, "%s r%d = %s(0);\n", reg_type, instr->def.index, reg_type); + } break; + case nir_intrinsic_load_reg: + // register loads get inlined into the uses + break; + case nir_intrinsic_store_reg: + P_IND(ctx, "r%d", instr->src[1].ssa->index); + writemask_to_msl(ctx, nir_intrinsic_write_mask(instr), + instr->num_components); + /* Registers don't store the component count, so get it from the value we + * are assigning */ + if (instr->src[0].ssa->bit_size == 1u) { + P(ctx, " = bool%s((", component_str(instr->num_components)); + } else if (nir_src_is_const(instr->src[0])) { + /* Const vector types already build the type */ + if (instr->src[0].ssa->num_components > 1) { + P(ctx, " = as_type<%s>((", + msl_uint_type(instr->src[0].ssa->bit_size, + instr->src[0].ssa->num_components)); + } else { + P(ctx, " = as_type<%s>(%s(", + msl_uint_type(instr->src[0].ssa->bit_size, + instr->src[0].ssa->num_components), + msl_type_for_src(ctx->types, &instr->src[0])); + } + } else { + P(ctx, " = as_type<%s>((", + msl_uint_type(instr->src[0].ssa->bit_size, + instr->src[0].ssa->num_components)); + } + src_to_msl(ctx, &instr->src[0]); + P(ctx, "));\n"); + break; + case nir_intrinsic_load_subgroup_size: + P(ctx, "gl_SubGroupSize;\n"); + break; + case nir_intrinsic_load_subgroup_invocation: + P(ctx, "gl_SubGroupInvocation;\n"); + break; + case nir_intrinsic_load_num_subgroups: + P(ctx, "gl_NumSubGroups;\n"); + break; + case nir_intrinsic_load_subgroup_id: + P(ctx, "gl_SubGroupID;\n"); + break; + case nir_intrinsic_load_workgroup_id: + P(ctx, "gl_WorkGroupID;\n"); + break; + case nir_intrinsic_load_local_invocation_id: + P(ctx, "gl_LocalInvocationID;\n"); + break; + case nir_intrinsic_load_global_invocation_id: + P(ctx, "gl_GlobalInvocationID;\n"); + break; + case nir_intrinsic_load_num_workgroups: + P(ctx, "gl_NumWorkGroups;\n"); + break; + case nir_intrinsic_load_local_invocation_index: + P(ctx, "gl_LocalInvocationIndex;\n"); + break; + case nir_intrinsic_load_frag_coord: + P(ctx, "gl_FragCoord;\n"); + break; + case nir_intrinsic_load_point_coord: + P(ctx, "gl_PointCoord;\n"); + break; + case nir_intrinsic_load_vertex_id: + P(ctx, "gl_VertexID;\n"); + break; + case nir_intrinsic_load_instance_id: + P(ctx, "gl_InstanceID;\n"); + break; + case nir_intrinsic_load_base_instance: + P(ctx, "gl_BaseInstance;\n"); + break; + case nir_intrinsic_load_helper_invocation: + P(ctx, "gl_HelperInvocation;\n"); + break; + case nir_intrinsic_is_helper_invocation: + P(ctx, "simd_is_helper_thread();\n"); + break; + case nir_intrinsic_ddx: + case nir_intrinsic_ddx_coarse: + case nir_intrinsic_ddx_fine: + P(ctx, "dfdx("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ");\n"); + break; + case nir_intrinsic_ddy: + case nir_intrinsic_ddy_coarse: + case nir_intrinsic_ddy_fine: + P(ctx, "dfdy("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ");\n"); + break; + case nir_intrinsic_load_front_face: + P(ctx, "gl_FrontFacing;\n"); + break; + case nir_intrinsic_load_layer_id: + P(ctx, "gl_Layer;\n"); + break; + case nir_intrinsic_load_sample_id: + P(ctx, "gl_SampleID;\n"); + break; + case nir_intrinsic_load_sample_mask_in: + P(ctx, "gl_SampleMask;\n"); + break; + case nir_intrinsic_load_amplification_id_kk: + P(ctx, "mtl_AmplificationID;\n"); + break; + case nir_intrinsic_load_interpolated_input: { + unsigned idx = nir_src_as_uint(instr->src[1u]); + nir_io_semantics io = nir_intrinsic_io_semantics(instr); + uint32_t component = nir_intrinsic_component(instr); + uint32_t location = io.location + idx; + P(ctx, "in.%s", msl_input_name(ctx, location)); + if (instr->num_components < get_input_num_components(ctx, location)) { + P(ctx, "."); + for (unsigned i = 0; i < instr->num_components; i++) + P(ctx, "%c", "xyzw"[component + i]); + } + P(ctx, ";\n"); + break; + } + case nir_intrinsic_load_input: { + unsigned idx = nir_src_as_uint(instr->src[0u]); + nir_io_semantics io = nir_intrinsic_io_semantics(instr); + uint32_t component = nir_intrinsic_component(instr); + uint32_t location = io.location + idx; + P(ctx, "in.%s", msl_input_name(ctx, location)); + if (instr->num_components < get_input_num_components(ctx, location)) { + P(ctx, "."); + for (unsigned i = 0; i < instr->num_components; i++) + P(ctx, "%c", "xyzw"[component + i]); + } + P(ctx, ";\n"); + break; + } + case nir_intrinsic_load_output: { + unsigned idx = nir_src_as_uint(instr->src[0]); + nir_io_semantics io = nir_intrinsic_io_semantics(instr); + P(ctx, "out.%s;\n", msl_output_name(ctx, io.location + idx)); + break; + } + case nir_intrinsic_store_output: { + uint32_t idx = nir_src_as_uint(instr->src[1]); + nir_io_semantics io = nir_intrinsic_io_semantics(instr); + uint32_t location = io.location + idx; + uint32_t write_mask = nir_intrinsic_write_mask(instr); + uint32_t component = nir_intrinsic_component(instr); + uint32_t dst_num_components = get_output_num_components(ctx, location); + uint32_t num_components = instr->num_components; + + P_IND(ctx, "out.%s", msl_output_name(ctx, location)); + if (dst_num_components > 1u) { + P(ctx, "."); + for (unsigned i = 0; i < num_components; i++) + if ((write_mask >> i) & 1) + P(ctx, "%c", "xyzw"[component + i]); + } + P(ctx, " = "); + src_to_msl(ctx, &instr->src[0]); + if (num_components > 1u) { + P(ctx, "."); + for (unsigned i = 0; i < num_components; i++) + if ((write_mask >> i) & 1) + P(ctx, "%c", "xyzw"[i]); + } + P(ctx, ";\n"); + break; + } + case nir_intrinsic_load_push_constant: { + const char *ty = msl_type_for_def(ctx->types, &instr->def); + assert(nir_intrinsic_base(instr) == 0); + P(ctx, "*((constant %s*)&buf.push_consts[", ty); + src_to_msl(ctx, &instr->src[0]); + P(ctx, "]);\n"); + break; + } + case nir_intrinsic_load_buffer_ptr_kk: + P(ctx, "(ulong)&buf%d.contents[0];\n", nir_intrinsic_binding(instr)); + break; + case nir_intrinsic_load_global: { + src_to_packed_load(ctx, &instr->src[0], "device", + msl_type_for_def(ctx->types, &instr->def), + instr->def.num_components); + P(ctx, ";\n"); + break; + } + case nir_intrinsic_load_global_constant: { + src_to_packed_load(ctx, &instr->src[0], "constant", + msl_type_for_def(ctx->types, &instr->def), + instr->def.num_components); + P(ctx, ";\n"); + break; + } + case nir_intrinsic_load_global_constant_bounded: { + src_to_msl(ctx, &instr->src[1]); + P(ctx, " < "); + src_to_msl(ctx, &instr->src[2]); + P(ctx, " ? "); + src_to_packed_load_offset(ctx, &instr->src[0], &instr->src[1], "constant", + msl_type_for_def(ctx->types, &instr->def), + instr->def.num_components); + P(ctx, " : 0;\n"); + break; + } + case nir_intrinsic_load_global_constant_offset: { + src_to_packed_load_offset(ctx, &instr->src[0], &instr->src[1], "device", + msl_type_for_def(ctx->types, &instr->def), + instr->def.num_components); + P(ctx, ";\n"); + break; + } + case nir_intrinsic_global_atomic: + atomic_to_msl(ctx, instr, "device", false); + break; + case nir_intrinsic_global_atomic_swap: + atomic_swap_to_msl(ctx, instr, "device", false); + break; + case nir_intrinsic_shared_atomic: + atomic_to_msl(ctx, instr, "threadgroup", true); + break; + case nir_intrinsic_shared_atomic_swap: + atomic_swap_to_msl(ctx, instr, "threadgroup", true); + break; + case nir_intrinsic_store_global: { + const char *type = msl_type_for_src(ctx->types, &instr->src[0]); + src_to_packed_store(ctx, &instr->src[1], "device", type, + instr->src[0].ssa->num_components); + writemask_to_msl(ctx, nir_intrinsic_write_mask(instr), + instr->num_components); + P(ctx, " = ") + src_to_packed(ctx, &instr->src[0], type, + instr->src[0].ssa->num_components); + P(ctx, ";\n"); + break; + } + case nir_intrinsic_barrier: { + mesa_scope execution_scope = nir_intrinsic_execution_scope(instr); + nir_variable_mode memory_modes = nir_intrinsic_memory_modes(instr); + if (execution_scope == SCOPE_SUBGROUP) { + P_IND(ctx, "simdgroup_barrier("); + memory_modes_to_msl(ctx, memory_modes); + } else if (execution_scope == SCOPE_WORKGROUP) { + P_IND(ctx, "threadgroup_barrier("); + memory_modes_to_msl(ctx, memory_modes); + } else if (execution_scope == SCOPE_NONE) { + /* Empty barrier */ + if (memory_modes == 0u) + break; + + P_IND(ctx, "atomic_thread_fence("); + memory_modes_to_msl(ctx, memory_modes); + P(ctx, ", memory_order_seq_cst, "); + switch (nir_intrinsic_memory_scope(instr)) { + case SCOPE_SUBGROUP: + P(ctx, "thread_scope::thread_scope_simdgroup"); + break; + case SCOPE_WORKGROUP: + /* TODO_KOSMICKRISP This if case should not be needed but we fail + * the following CTS tests otherwise: + * dEQP-VK.memory_model.*.ext.u32.*coherent.*.atomicwrite.workgroup.payload_*local.*.guard_local.*.comp + * The last two wild cards being either 'workgroup' or 'physbuffer' + */ + if (memory_modes & + (nir_var_mem_global | nir_var_mem_ssbo | nir_var_image)) { + P(ctx, "thread_scope::thread_scope_device"); + } else { + P(ctx, "thread_scope::thread_scope_threadgroup"); + } + + break; + case SCOPE_QUEUE_FAMILY: + case SCOPE_DEVICE: + P(ctx, "thread_scope::thread_scope_device"); + break; + default: + P(ctx, "bad_scope"); + assert(!"bad scope"); + break; + } + } else { + UNREACHABLE("bad_execution scope"); + } + P(ctx, ");\n"); + break; + } + case nir_intrinsic_demote: + P_IND(ctx, "discard_fragment();\n"); + break; + case nir_intrinsic_demote_if: + P_IND(ctx, "if (") + src_to_msl(ctx, &instr->src[0]); + P(ctx, ")\n"); + ctx->indentlevel++; + P_IND(ctx, "discard_fragment();\n"); + ctx->indentlevel--; + break; + case nir_intrinsic_terminate: + P_IND(ctx, "discard_fragment();\n"); + P_IND(ctx, "return {};\n"); + break; + case nir_intrinsic_terminate_if: + P_IND(ctx, "if (") + src_to_msl(ctx, &instr->src[0]); + P(ctx, ") {\n"); + ctx->indentlevel++; + P_IND(ctx, "discard_fragment();\n"); + P_IND(ctx, "return {};\n"); + ctx->indentlevel--; + P_IND(ctx, "}\n"); + break; + case nir_intrinsic_load_shared: + assert(nir_intrinsic_base(instr) == 0); + P(ctx, "*(threadgroup %s*)&shared_data[", + msl_type_for_def(ctx->types, &instr->def)); + src_to_msl(ctx, &instr->src[0]); + P(ctx, "];\n"); + break; + case nir_intrinsic_store_shared: + assert(nir_intrinsic_base(instr) == 0); + P_IND(ctx, "(*(threadgroup %s*)&shared_data[", + msl_type_for_src(ctx->types, &instr->src[0])); + src_to_msl(ctx, &instr->src[1]); + P(ctx, "])"); + writemask_to_msl(ctx, nir_intrinsic_write_mask(instr), + instr->num_components); + P(ctx, " = "); + src_to_msl(ctx, &instr->src[0]); + if (instr->src[0].ssa->num_components > 1) + writemask_to_msl(ctx, nir_intrinsic_write_mask(instr), + instr->num_components); + P(ctx, ";\n"); + break; + case nir_intrinsic_load_scratch: + P(ctx, "*(thread %s*)&scratch[", + msl_type_for_def(ctx->types, &instr->def)); + src_to_msl(ctx, &instr->src[0]); + P(ctx, "];\n"); + break; + case nir_intrinsic_store_scratch: + P_IND(ctx, "(*(thread %s*)&scratch[", + msl_type_for_src(ctx->types, &instr->src[0])); + src_to_msl(ctx, &instr->src[1]); + P(ctx, "])"); + writemask_to_msl(ctx, nir_intrinsic_write_mask(instr), + instr->num_components); + P(ctx, " = "); + src_to_msl(ctx, &instr->src[0]); + if (instr->src[0].ssa->num_components > 1) + writemask_to_msl(ctx, nir_intrinsic_write_mask(instr), + instr->num_components); + P(ctx, ";\n"); + break; + case nir_intrinsic_load_texture_handle_kk: { + const char *access = ""; + switch (nir_intrinsic_flags(instr)) { + case MSL_ACCESS_READ: + access = ", access::read"; + break; + case MSL_ACCESS_WRITE: + access = ", access::write"; + break; + case MSL_ACCESS_READ_WRITE: + access = ", access::read_write"; + break; + } + P_IND(ctx, "texture%s%s<%s%s> t%d = *(constant texture%s%s<%s%s>*)", + texture_dim(nir_intrinsic_image_dim(instr)), + nir_intrinsic_image_array(instr) ? "_array" : "", + tex_type_name(nir_intrinsic_dest_type(instr)), access, + instr->def.index, texture_dim(nir_intrinsic_image_dim(instr)), + nir_intrinsic_image_array(instr) ? "_array" : "", + tex_type_name(nir_intrinsic_dest_type(instr)), access); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ";\n"); + break; + } + case nir_intrinsic_load_depth_texture_kk: + P_IND(ctx, "depth%s%s<float> t%d = *(constant depth%s%s<float>*)", + texture_dim(nir_intrinsic_image_dim(instr)), + nir_intrinsic_image_array(instr) ? "_array" : "", instr->def.index, + texture_dim(nir_intrinsic_image_dim(instr)), + nir_intrinsic_image_array(instr) ? "_array" : ""); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ";\n"); + break; + case nir_intrinsic_load_sampler_handle_kk: + P(ctx, "sampler_table.handles["); + src_to_msl(ctx, &instr->src[0]); + P(ctx, "];\n"); + break; + case nir_intrinsic_load_constant_agx: { + const char *type = msl_type_for_def(ctx->types, &instr->def); + const char *no_component_type = + msl_pipe_format_to_msl_type(nir_intrinsic_format(instr)); + if (instr->def.num_components == 1) { + P(ctx, "(*(((constant %s*)", type); + } else { + P(ctx, "%s(*(constant packed_%s*)(((constant %s*)", type, type, + no_component_type); + } + src_to_msl(ctx, &instr->src[0]); + P(ctx, ") + "); + src_to_msl(ctx, &instr->src[1]); + P(ctx, "));\n"); + break; + } + case nir_intrinsic_bindless_image_load: + src_to_msl(ctx, &instr->src[0]); + P(ctx, ".read("); + image_coord_swizzle(ctx, instr); + if (nir_intrinsic_image_dim(instr) != GLSL_SAMPLER_DIM_BUF) { + P(ctx, ", "); + src_to_msl(ctx, &instr->src[3]); + } + /* read will always return vec4 and we may try to assign that to an uint + * which is illegal. */ + P(ctx, ")."); + for (uint32_t i = 0u; i < instr->def.num_components; ++i) { + P(ctx, "%c", "xyzw"[i]); + } + P(ctx, ";\n"); + break; + case nir_intrinsic_bindless_image_store: + P_INDENT(ctx); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ".write("); + src_to_msl(ctx, &instr->src[3]); + P(ctx, ", "); + image_coord_swizzle(ctx, instr); + if (nir_intrinsic_image_dim(instr) != GLSL_SAMPLER_DIM_BUF) { + P(ctx, ", "); + src_to_msl(ctx, &instr->src[4]); + } + P(ctx, ");\n"); + break; + case nir_intrinsic_bindless_image_atomic: + src_to_msl(ctx, &instr->src[0]); + P(ctx, ".%s(", atomic_op_to_msl(nir_intrinsic_atomic_op(instr))); + image_coord_swizzle(ctx, instr); + P(ctx, ", "); + src_to_msl(ctx, &instr->src[3]); + P(ctx, ").x;\n"); + break; + case nir_intrinsic_bindless_image_atomic_swap: { + const char *type = msl_type_for_def(ctx->types, &instr->def); + P_IND(ctx, "%s4 ta%d = ", type, instr->def.index); + src_to_msl(ctx, &instr->src[3]); + P(ctx, "; "); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ".%s(", atomic_op_to_msl(nir_intrinsic_atomic_op(instr))); + image_coord_swizzle(ctx, instr); + P(ctx, ", &ta%d, ", instr->def.index); + src_to_msl(ctx, &instr->src[4]); + P(ctx, "); %s t%d = ta%d.x;\n", type, instr->def.index, instr->def.index); + break; + } + case nir_intrinsic_ballot: + P(ctx, "(ulong)simd_ballot("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ");\n"); + break; + case nir_intrinsic_elect: + /* If we don't add && "(ulong)simd_ballot(true)"" the following CTS tests + * fail: + * dEQP-VK.subgroups.ballot_other.graphics.subgroupballotfindlsb + * dEQP-VK.subgroups.ballot_other.compute.subgroupballotfindlsb + * Weird Metal bug: + * if (simd_is_first()) + * temp = 3u; + * else + * temp = simd_ballot(true); <- This will return all active threads... + */ + P(ctx, "simd_is_first() && (ulong)simd_ballot(true);\n"); + break; + case nir_intrinsic_read_first_invocation: + P(ctx, "simd_broadcast_first("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ");\n"); + break; + case nir_intrinsic_read_invocation: + P(ctx, "simd_broadcast("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ", "); + src_to_msl(ctx, &instr->src[1]); + P(ctx, ");"); + break; + case nir_intrinsic_shuffle: + P(ctx, "simd_shuffle("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ", "); + src_to_msl(ctx, &instr->src[1]); + P(ctx, ");\n"); + break; + case nir_intrinsic_shuffle_xor: + P(ctx, "simd_shuffle_xor("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ", "); + src_to_msl(ctx, &instr->src[1]); + P(ctx, ");\n"); + break; + case nir_intrinsic_shuffle_up: + P(ctx, "simd_shuffle_up("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ", "); + src_to_msl(ctx, &instr->src[1]); + P(ctx, ");\n"); + break; + case nir_intrinsic_shuffle_down: + P(ctx, "simd_shuffle_down("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ", "); + src_to_msl(ctx, &instr->src[1]); + P(ctx, ");\n"); + break; + + case nir_intrinsic_vote_all: + P(ctx, "simd_all("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ");\n"); + break; + case nir_intrinsic_vote_any: + P(ctx, "simd_any("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ");\n"); + break; + case nir_intrinsic_quad_broadcast: + P(ctx, "quad_broadcast("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ", "); + src_to_msl(ctx, &instr->src[1]); + P(ctx, ");\n"); + break; + case nir_intrinsic_quad_swap_horizontal: + P(ctx, "quad_shuffle_xor("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ", 1);\n"); + break; + case nir_intrinsic_quad_swap_vertical: + P(ctx, "quad_shuffle_xor("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ", 2);\n"); + break; + case nir_intrinsic_quad_swap_diagonal: + P(ctx, "quad_shuffle_xor("); + src_to_msl(ctx, &instr->src[0]); + P(ctx, ", 3);\n"); + break; + case nir_intrinsic_reduce: + switch (nir_intrinsic_reduction_op(instr)) { + case nir_op_iadd: + case nir_op_fadd: + P(ctx, "simd_sum("); + break; + case nir_op_imul: + case nir_op_fmul: + P(ctx, "simd_product("); + break; + case nir_op_imin: + case nir_op_umin: + case nir_op_fmin: + P(ctx, "simd_min("); + break; + case nir_op_imax: + case nir_op_umax: + case nir_op_fmax: + P(ctx, "simd_max("); + break; + case nir_op_iand: + P(ctx, "simd_and("); + break; + case nir_op_ior: + P(ctx, "simd_or("); + break; + case nir_op_ixor: + P(ctx, "simd_xor("); + break; + default: + UNREACHABLE("Bad reduction op"); + } + + src_to_msl(ctx, &instr->src[0]); + P(ctx, ");\n"); + break; + default: + P_IND(ctx, "Unknown intrinsic %s\n", info->name); + } +} + +static nir_src * +nir_tex_get_src(struct nir_tex_instr *tex, nir_tex_src_type type) +{ + int idx = nir_tex_instr_src_index(tex, type); + if (idx == -1) + return NULL; + return &tex->src[idx].src; +} + +static void +tex_coord_swizzle(struct nir_to_msl_ctx *ctx, nir_tex_instr *tex) +{ + texture_src_coord_swizzle(ctx, nir_tex_get_src(tex, nir_tex_src_coord), + tex->coord_components, false, tex->is_array); +} + +static void +tex_to_msl(struct nir_to_msl_ctx *ctx, nir_tex_instr *tex) +{ + nir_src *texhandle = nir_tex_get_src(tex, nir_tex_src_texture_handle); + nir_src *sampler = nir_tex_get_src(tex, nir_tex_src_sampler_handle); + // Projectors have to be lowered away to regular arithmetic + assert(!nir_tex_get_src(tex, nir_tex_src_projector)); + + P_IND(ctx, "t%d = ", tex->def.index); + + switch (tex->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txd: { + nir_src *bias = nir_tex_get_src(tex, nir_tex_src_bias); + nir_src *lod = nir_tex_get_src(tex, nir_tex_src_lod); + nir_src *ddx = nir_tex_get_src(tex, nir_tex_src_ddx); + nir_src *ddy = nir_tex_get_src(tex, nir_tex_src_ddy); + nir_src *min_lod_clamp = nir_tex_get_src(tex, nir_tex_src_min_lod); + nir_src *offset = nir_tex_get_src(tex, nir_tex_src_offset); + nir_src *comparator = nir_tex_get_src(tex, nir_tex_src_comparator); + src_to_msl(ctx, texhandle); + if (comparator) { + P(ctx, ".sample_compare("); + } else { + P(ctx, ".sample("); + } + src_to_msl(ctx, sampler); + P(ctx, ", "); + tex_coord_swizzle(ctx, tex); + if (comparator) { + P(ctx, ", "); + src_to_msl(ctx, comparator); + } + if (bias) { + P(ctx, ", bias("); + src_to_msl(ctx, bias); + P(ctx, ")"); + } + if (lod) { + P(ctx, ", level("); + src_to_msl(ctx, lod); + P(ctx, ")"); + } + if (ddx) { + P(ctx, ", gradient%s(", texture_dim(tex->sampler_dim)); + src_to_msl(ctx, ddx); + P(ctx, ", "); + src_to_msl(ctx, ddy); + P(ctx, ")"); + } + if (min_lod_clamp) { + P(ctx, ", min_lod_clamp("); + src_to_msl(ctx, min_lod_clamp); + P(ctx, ")"); + } + if (offset) { + P(ctx, ", "); + src_to_msl(ctx, offset); + } + P(ctx, ");\n"); + break; + } + case nir_texop_txf: { + src_to_msl(ctx, texhandle); + P(ctx, ".read("); + tex_coord_swizzle(ctx, tex); + nir_src *lod = nir_tex_get_src(tex, nir_tex_src_lod); + if (lod) { + P(ctx, ", "); + src_to_msl(ctx, lod); + } + P(ctx, ");\n"); + break; + } + case nir_texop_txf_ms: + src_to_msl(ctx, texhandle); + P(ctx, ".read("); + tex_coord_swizzle(ctx, tex); + P(ctx, ", "); + src_to_msl(ctx, nir_tex_get_src(tex, nir_tex_src_ms_index)); + P(ctx, ");\n"); + break; + case nir_texop_txs: { + nir_src *lod = nir_tex_get_src(tex, nir_tex_src_lod); + if (tex->def.num_components > 1u) { + P(ctx, "%s%d(", tex_type_name(tex->dest_type), + tex->def.num_components); + } else { + P(ctx, "%s(", tex_type_name(tex->dest_type)); + } + src_to_msl(ctx, texhandle); + P(ctx, ".get_width(") + if (lod && tex->sampler_dim != GLSL_SAMPLER_DIM_MS && + tex->sampler_dim != GLSL_SAMPLER_DIM_BUF) + src_to_msl(ctx, lod); + P(ctx, ")"); + if (tex->sampler_dim != GLSL_SAMPLER_DIM_1D && + tex->sampler_dim != GLSL_SAMPLER_DIM_BUF) { + P(ctx, ", "); + src_to_msl(ctx, texhandle); + P(ctx, ".get_height("); + if (lod && tex->sampler_dim != GLSL_SAMPLER_DIM_MS && + tex->sampler_dim != GLSL_SAMPLER_DIM_BUF) + src_to_msl(ctx, lod); + P(ctx, ")"); + } + if (tex->sampler_dim == GLSL_SAMPLER_DIM_3D) { + P(ctx, ", "); + src_to_msl(ctx, texhandle); + P(ctx, ".get_depth("); + if (lod) + src_to_msl(ctx, lod); + P(ctx, ")"); + } + if (tex->is_array) { + P(ctx, ", "); + src_to_msl(ctx, texhandle); + P(ctx, ".get_array_size()"); + } + P(ctx, ");\n") + break; + } + case nir_texop_query_levels: + src_to_msl(ctx, texhandle); + P(ctx, ".get_num_mip_levels();\n"); + break; + case nir_texop_tg4: { + nir_src *offset = nir_tex_get_src(tex, nir_tex_src_offset); + nir_src *comparator = nir_tex_get_src(tex, nir_tex_src_comparator); + src_to_msl(ctx, texhandle); + if (comparator) { + P(ctx, ".gather_compare("); + } else { + P(ctx, ".gather("); + } + src_to_msl(ctx, sampler); + P(ctx, ", "); + tex_coord_swizzle(ctx, tex); + if (comparator) { + P(ctx, ", "); + src_to_msl(ctx, comparator); + } + P(ctx, ", "); + if (offset) + src_to_msl(ctx, offset); + else + P(ctx, "int2(0)"); + + /* Non-depth textures require component */ + if (!comparator) { + P(ctx, ", component::%c", "xyzw"[tex->component]); + } + + P(ctx, ");\n"); + break; + } + + case nir_texop_texture_samples: + src_to_msl(ctx, texhandle); + P(ctx, ".get_num_samples();\n"); + break; + case nir_texop_lod: { + nir_src *coord = nir_tex_get_src(tex, nir_tex_src_coord); + nir_src *bias = nir_tex_get_src(tex, nir_tex_src_bias); + nir_src *min = nir_tex_get_src(tex, nir_tex_src_min_lod); + nir_src *max = nir_tex_get_src(tex, nir_tex_src_max_lod_kk); + P(ctx, "float2(round(clamp(") + src_to_msl(ctx, texhandle); + P(ctx, ".calculate_unclamped_lod("); + src_to_msl(ctx, sampler); + P(ctx, ", "); + src_to_msl(ctx, coord); + P(ctx, ") + "); + src_to_msl(ctx, bias); + P(ctx, ", "); + src_to_msl(ctx, min); + P(ctx, ", "); + src_to_msl(ctx, max); + P(ctx, ")), "); + src_to_msl(ctx, texhandle); + P(ctx, ".calculate_unclamped_lod("); + src_to_msl(ctx, sampler); + P(ctx, ", "); + src_to_msl(ctx, coord); + P(ctx, ")"); + P(ctx, ");\n"); + break; + } + default: + assert(!"Unsupported texture op"); + } +} + +static void +jump_instr_to_msl(struct nir_to_msl_ctx *ctx, nir_jump_instr *jump) +{ + switch (jump->type) { + case nir_jump_halt: + P_IND(ctx, "TODO: halt\n"); + assert(!"Unimplemented"); + break; + case nir_jump_break: + P_IND(ctx, "break;\n"); + break; + case nir_jump_continue: + P_IND(ctx, "continue;\n"); + break; + case nir_jump_return: + assert(!"functions should have been inlined by now"); + break; + case nir_jump_goto: + case nir_jump_goto_if: + assert(!"Unstructured control flow not supported"); + break; + } +} + +static void +instr_to_msl(struct nir_to_msl_ctx *ctx, nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(instr); + P_IND(ctx, "t%d = ", alu->def.index); + alu_to_msl(ctx, alu); + P(ctx, ";\n"); + break; + } + case nir_instr_type_deref: + assert(!"We should have lowered derefs by now"); + break; + case nir_instr_type_call: + assert(!"We should have inlined all functions by now"); + break; + case nir_instr_type_tex: + tex_to_msl(ctx, nir_instr_as_tex(instr)); + break; + case nir_instr_type_intrinsic: + intrinsic_to_msl(ctx, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_load_const: + // consts get inlined into their uses + break; + case nir_instr_type_jump: + jump_instr_to_msl(ctx, nir_instr_as_jump(instr)); + break; + case nir_instr_type_undef: + // undefs get inlined into their uses (and we shouldn't see them hopefully) + break; + case nir_instr_type_phi: + case nir_instr_type_parallel_copy: + assert(!"NIR should be taken out of SSA"); + break; + } +} + +static void +cf_node_to_metal(struct nir_to_msl_ctx *ctx, nir_cf_node *node) +{ + switch (node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(node); + nir_foreach_instr(instr, block) { + instr_to_msl(ctx, instr); + } + break; + } + case nir_cf_node_if: { + nir_if *ifnode = nir_cf_node_as_if(node); + P_IND(ctx, "if ("); + src_to_msl(ctx, &ifnode->condition); + P(ctx, ") {\n"); + ctx->indentlevel++; + foreach_list_typed(nir_cf_node, node, node, &ifnode->then_list) { + cf_node_to_metal(ctx, node); + } + ctx->indentlevel--; + if (!nir_cf_list_is_empty_block(&ifnode->else_list)) { + P_IND(ctx, "} else {\n"); + ctx->indentlevel++; + foreach_list_typed(nir_cf_node, node, node, &ifnode->else_list) { + cf_node_to_metal(ctx, node); + } + ctx->indentlevel--; + } + P_IND(ctx, "}\n"); + break; + } + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + assert(!nir_loop_has_continue_construct(loop)); + /* We need to loop to infinite since MSL compiler crashes if we have + something like (simplified version): + * // clang-format off + * while (true) { + * if (some_conditional) { + * break_loop = true; + * } else { + * break_loop = false; + * } + * if (break_loop) { + * break; + * } + * } + * // clang-format on + * The issue I believe is that some_conditional wouldn't change the value + * no matter in which iteration we are (something like fetching the same + * value from a buffer) and the MSL compiler doesn't seem to like that + * much to the point it crashes. + * With this for loop now, we trick the MSL compiler into believing we are + * not doing an infinite loop (wink wink) + */ + P_IND(ctx, + "for (uint64_t no_crash = 0u; no_crash < %" PRIu64 + "; ++no_crash) {\n", + UINT64_MAX); + ctx->indentlevel++; + foreach_list_typed(nir_cf_node, node, node, &loop->body) { + cf_node_to_metal(ctx, node); + } + ctx->indentlevel--; + P_IND(ctx, "}\n"); + break; + } + case nir_cf_node_function: + assert(!"All functions are supposed to be inlined"); + } +} + +static void +emit_output_return(struct nir_to_msl_ctx *ctx, nir_shader *shader) +{ + if (shader->info.stage == MESA_SHADER_VERTEX || + shader->info.stage == MESA_SHADER_FRAGMENT) + P_IND(ctx, "return out;\n"); +} + +static void +rename_main_entrypoint(struct nir_shader *nir) +{ + /* Rename entrypoint to avoid MSL limitations after we've removed all others. + * We don't really care what it's named as long as it's not "main" + */ + const char *entrypoint_name = "main_entrypoint"; + nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir); + struct nir_function *function = entrypoint->function; + ralloc_free((void *)function->name); + function->name = ralloc_strdup(function, entrypoint_name); +} + +static bool +kk_scalarize_filter(const nir_instr *instr, const void *data) +{ + if (instr->type != nir_instr_type_alu) + return false; + return true; +} + +void +msl_preprocess_nir(struct nir_shader *nir) +{ + /* First, inline away all the functions */ + NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS(_, nir, nir_lower_returns); + NIR_PASS(_, nir, nir_inline_functions); + NIR_PASS(_, nir, nir_opt_deref); + nir_remove_non_entrypoints(nir); + + NIR_PASS(_, nir, nir_lower_global_vars_to_local); + NIR_PASS(_, nir, nir_split_var_copies); + NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp); + NIR_PASS(_, nir, nir_split_array_vars, nir_var_function_temp); + NIR_PASS(_, nir, nir_split_per_member_structs); + NIR_PASS(_, nir, nir_lower_continue_constructs); + + NIR_PASS(_, nir, nir_lower_frexp); + + NIR_PASS(_, nir, nir_lower_vars_to_ssa); + NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL); + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + nir_input_attachment_options input_attachment_options = { + .use_fragcoord_sysval = true, + .use_layer_id_sysval = true, + }; + NIR_PASS(_, nir, nir_lower_input_attachments, &input_attachment_options); + } + NIR_PASS(_, nir, nir_opt_combine_barriers, NULL, NULL); + NIR_PASS(_, nir, nir_lower_var_copies); + NIR_PASS(_, nir, nir_split_var_copies); + + NIR_PASS(_, nir, nir_split_array_vars, + nir_var_function_temp | nir_var_shader_in | nir_var_shader_out); + NIR_PASS(_, nir, nir_lower_alu_to_scalar, kk_scalarize_filter, NULL); + + NIR_PASS(_, nir, nir_lower_indirect_derefs, + nir_var_shader_in | nir_var_shader_out, UINT32_MAX); + NIR_PASS(_, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 0, + glsl_get_natural_size_align_bytes, + glsl_get_natural_size_align_bytes); + + NIR_PASS(_, nir, nir_lower_system_values); + + nir_lower_compute_system_values_options csv_options = { + .has_base_global_invocation_id = 0, + .has_base_workgroup_id = true, + }; + NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options); + + msl_nir_lower_subgroups(nir); +} + +bool +msl_optimize_nir(struct nir_shader *nir) +{ + bool progress; + NIR_PASS(_, nir, nir_lower_int64); + do { + progress = false; + + NIR_PASS(progress, nir, nir_split_var_copies); + NIR_PASS(progress, nir, nir_split_struct_vars, nir_var_function_temp); + NIR_PASS(progress, nir, nir_lower_var_copies); + NIR_PASS(progress, nir, nir_lower_vars_to_ssa); + NIR_PASS(progress, nir, nir_opt_undef); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_opt_cse); + NIR_PASS(progress, nir, nir_opt_dead_cf); + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_deref); + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + NIR_PASS(progress, nir, nir_opt_dead_write_vars); + NIR_PASS(progress, nir, nir_opt_combine_stores, nir_var_all); + NIR_PASS(progress, nir, nir_remove_dead_variables, nir_var_function_temp, + NULL); + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_opt_if, 0); + NIR_PASS(progress, nir, nir_opt_remove_phis); + NIR_PASS(progress, nir, nir_opt_loop); + NIR_PASS(progress, nir, nir_lower_pack); + NIR_PASS(progress, nir, nir_lower_alu_to_scalar, kk_scalarize_filter, + NULL); + } while (progress); + NIR_PASS(_, nir, nir_lower_load_const_to_scalar); + NIR_PASS(_, nir, msl_nir_lower_algebraic_late); + NIR_PASS(_, nir, nir_convert_from_ssa, true, false); + nir_trivialize_registers(nir); + NIR_PASS(_, nir, nir_copy_prop); + + return progress; +} + +static void +msl_gather_info(struct nir_to_msl_ctx *ctx) +{ + nir_function_impl *impl = nir_shader_get_entrypoint(ctx->shader); + ctx->types = msl_infer_types(ctx->shader); + + /* TODO_KOSMICKRISP + * Reindex blocks and ssa. This allows us to optimize things we don't at the + * moment. */ + nir_index_blocks(impl); + nir_index_ssa_defs(impl); + + if (ctx->shader->info.stage == MESA_SHADER_VERTEX || + ctx->shader->info.stage == MESA_SHADER_FRAGMENT) { + msl_gather_io_info(ctx, ctx->inputs_info, ctx->outputs_info); + } +} + +static void +predeclare_ssa_values(struct nir_to_msl_ctx *ctx, nir_function_impl *impl) +{ + nir_foreach_block(block, impl) { + nir_foreach_instr(instr, block) { + nir_def *def; + switch (instr->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu = nir_instr_as_alu(instr); + def = &alu->def; + break; + } + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (!instrinsic_needs_dest_type(intr)) + continue; + def = &intr->def; + break; + } + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + def = &tex->def; + break; + } + default: + continue; + } + const char *type = msl_type_for_def(ctx->types, def); + if (!type) + continue; + if (msl_def_is_sampler(ctx, def)) { + P_IND(ctx, "%s t%u;\n", type, def->index); + } else + P_IND(ctx, "%s t%u = %s(0);\n", type, def->index, type); + } + } +} + +char * +nir_to_msl(nir_shader *shader, void *mem_ctx) +{ + /* Need to rename the entrypoint here since hardcoded shaders used by vk_meta + * don't go through the preprocess step since we are the ones creating them. + */ + rename_main_entrypoint(shader); + + struct nir_to_msl_ctx ctx = { + .shader = shader, + .text = _mesa_string_buffer_create(mem_ctx, 1024), + }; + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + msl_gather_info(&ctx); + + P(&ctx, "// Generated by Mesa compiler\n"); + if (shader->info.stage == MESA_SHADER_COMPUTE) + P(&ctx, "#include <metal_compute>\n"); + P(&ctx, "#include <metal_stdlib>\n"); + P(&ctx, "using namespace metal;\n"); + + msl_emit_io_blocks(&ctx, shader); + if (shader->info.stage == MESA_SHADER_FRAGMENT && + shader->info.fs.early_fragment_tests) + P(&ctx, "[[early_fragment_tests]]\n"); + P(&ctx, "%s %s %s(\n", get_stage_string(shader->info.stage), + output_type(shader), get_entrypoint_name(shader)); + ctx.indentlevel++; + emit_sysvals(&ctx, shader); + emit_inputs(&ctx, shader); + ctx.indentlevel--; + P(&ctx, ")\n"); + P(&ctx, "{\n"); + ctx.indentlevel++; + msl_emit_output_var(&ctx, shader); + emit_local_vars(&ctx, shader); + predeclare_ssa_values(&ctx, impl); + foreach_list_typed(nir_cf_node, node, node, &impl->body) { + cf_node_to_metal(&ctx, node); + } + emit_output_return(&ctx, shader); + ctx.indentlevel--; + P(&ctx, "}\n"); + char *ret = ctx.text->buf; + ralloc_steal(mem_ctx, ctx.text->buf); + ralloc_free(ctx.text); + return ret; +} diff --git a/src/kosmickrisp/compiler/nir_to_msl.h b/src/kosmickrisp/compiler/nir_to_msl.h new file mode 100644 index 00000000000..0a4d50109ed --- /dev/null +++ b/src/kosmickrisp/compiler/nir_to_msl.h @@ -0,0 +1,56 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include "nir.h" + +enum pipe_format; + +/* Assumes nir_shader_gather_info has been called beforehand. */ +char *nir_to_msl(nir_shader *shader, void *mem_ctx); + +/* Call this after all API-specific lowerings. It will bring the NIR out of SSA + * at the end */ +bool msl_optimize_nir(struct nir_shader *nir); + +/* Call this before all API-speicific lowerings, it will */ +void msl_preprocess_nir(struct nir_shader *nir); + +enum msl_tex_access_flag { + MSL_ACCESS_SAMPLE = 0, + MSL_ACCESS_READ, + MSL_ACCESS_WRITE, + MSL_ACCESS_READ_WRITE, +}; + +static inline enum msl_tex_access_flag +msl_convert_access_flag(enum gl_access_qualifier qual) +{ + if (qual & ACCESS_NON_WRITEABLE) + return MSL_ACCESS_READ; + if (qual & ACCESS_NON_READABLE) + return MSL_ACCESS_WRITE; + return MSL_ACCESS_READ_WRITE; +} + +bool msl_nir_fs_force_output_signedness( + nir_shader *nir, enum pipe_format render_target_formats[MAX_DRAW_BUFFERS]); + +bool msl_nir_vs_remove_point_size_write(nir_builder *b, + nir_intrinsic_instr *intrin, + void *data); + +bool msl_nir_fs_remove_depth_write(nir_builder *b, nir_intrinsic_instr *intrin, + void *data); + +bool msl_lower_textures(nir_shader *s); + +bool msl_lower_static_sample_mask(nir_shader *nir, uint32_t sample_mask); +bool msl_ensure_depth_write(nir_shader *nir); +bool msl_ensure_vertex_position_output(nir_shader *nir); +bool msl_nir_sample_mask_type(nir_shader *nir); +bool msl_nir_layer_id_type(nir_shader *nir); diff --git a/src/kosmickrisp/kosmicomp.c b/src/kosmickrisp/kosmicomp.c new file mode 100644 index 00000000000..a45d8203c96 --- /dev/null +++ b/src/kosmickrisp/kosmicomp.c @@ -0,0 +1,187 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include <stdio.h> + +#include "compiler/nir_to_msl.h" +#include "spirv/nir_spirv.h" + +static int +load_spirv(const char *filename, uint32_t **words, size_t *nwords) +{ + const size_t CHUNK_SIZE = 4096; + uint32_t buf[CHUNK_SIZE]; + FILE *input = fopen(filename, "r"); + if (!input) { + fprintf(stderr, "Could not open file %s: %s\n", filename, + strerror(errno)); + return -1; + } + + *nwords = 0; + *words = malloc(CHUNK_SIZE * sizeof(buf[0])); + size_t read_size; + while (1) { + read_size = fread(buf, sizeof(buf[0]), CHUNK_SIZE, input); + if (read_size == 0) + break; + *words = realloc(*words, (*nwords + read_size) * sizeof(buf[0])); + memcpy(*words + *nwords, buf, sizeof(buf[0]) * read_size); + *nwords += read_size; + }; + + if (*words[0] != 0x07230203) { + fprintf(stderr, "%s is not a SPIR-V file?\n", filename); + return -1; + } + + return 0; +} + +static void +debug_callback(void *priv, enum nir_spirv_debug_level debuglevel, size_t offset, + const char *message) +{ + fprintf(stderr, "<%d> at %ld %s\n", debuglevel, offset, message); +} + +static int +type_size_vec4(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +static void +shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) +{ + assert(glsl_type_is_vector_or_scalar(type)); + + uint32_t comp_size = + glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; + unsigned length = glsl_get_vector_elements(type); + *size = comp_size * length, *align = comp_size; +} + +static void +optimize(nir_shader *nir) +{ + msl_preprocess_nir(nir); + + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const, + nir_address_format_32bit_offset); + NIR_PASS(_, nir, nir_lower_explicit_io, + nir_var_mem_global | nir_var_mem_ubo | nir_var_mem_ssbo, + nir_address_format_64bit_global); + if (nir->info.stage == MESA_SHADER_COMPUTE) { + if (!nir->info.shared_memory_explicit_layout) { + /* There may be garbage in shared_size, but it's the job of + * nir_lower_vars_to_explicit_types to allocate it. We have to reset to + * avoid overallocation. + */ + nir->info.shared_size = 0; + + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, + shared_var_info); + } + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, + nir_address_format_32bit_offset); + } + + NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, + type_size_vec4, (nir_lower_io_options)0); + + NIR_PASS(_, nir, nir_lower_variable_initializers, ~nir_var_function_temp); + NIR_PASS(_, nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out | nir_var_system_value, + NULL); + NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries, + nir_shader_get_entrypoint(nir), true, false); + nir_lower_compute_system_values_options options = { + .has_base_global_invocation_id = 0, + }; + NIR_PASS(_, nir, nir_lower_system_values); + NIR_PASS(_, nir, nir_lower_compute_system_values, &options); + NIR_PASS(_, nir, nir_lower_global_vars_to_local); + NIR_PASS(_, nir, nir_lower_load_const_to_scalar); + + msl_optimize_nir(nir); +} + +static mesa_shader_stage +stage_from_filename(const char *filename) +{ + struct StageMapping { + char *name; + mesa_shader_stage stage; + }; + struct StageMapping stage_mappings[] = { + {.name = ".frag.", .stage = MESA_SHADER_FRAGMENT}, + {.name = ".vert.", .stage = MESA_SHADER_VERTEX}, + {.name = ".comp.", .stage = MESA_SHADER_COMPUTE}, + }; + for (int i = 0; i < ARRAY_SIZE(stage_mappings); i++) { + if (strstr(filename, stage_mappings[i].name)) + return stage_mappings[i].stage; + } + return MESA_SHADER_NONE; +} + +int +main(int argc, char **argv) +{ + if (argc != 2) { + fprintf(stderr, "Usage: kosmicomp filename.spv\n"); + return 1; + } + + // read file + size_t nwords = 0; + uint32_t *words = NULL; + int result = load_spirv(argv[1], &words, &nwords); + if (result == -1) { + return 2; + } + + // run spirv_to_nir + struct spirv_to_nir_options options = { + .environment = NIR_SPIRV_VULKAN, + .debug = + { + .func = &debug_callback, + .private_data = NULL, + }, + .ubo_addr_format = nir_address_format_64bit_global, + .ssbo_addr_format = nir_address_format_64bit_global, + .phys_ssbo_addr_format = nir_address_format_64bit_global, + }; + glsl_type_singleton_init_or_ref(); + struct nir_shader_compiler_options nir_options = { + .lower_fdph = 1, + }; + mesa_shader_stage stage = stage_from_filename(argv[1]); + if (stage == MESA_SHADER_NONE) { + fprintf(stderr, "Couldn't guess shader stage from %s\n", argv[1]); + return 4; + } + nir_shader *shader = spirv_to_nir(words, nwords, NULL, 0, stage, "main", + &options, &nir_options); + if (!shader) { + fprintf(stderr, "Compilation failed!\n"); + return 3; + } + // print nir + nir_print_shader(shader, stdout); + optimize(shader); + nir_print_shader(shader, stdout); + + char *msl_text = nir_to_msl(shader, shader); + + fputs(msl_text, stdout); + + ralloc_free(msl_text); + + return 0; +} diff --git a/src/kosmickrisp/meson.build b/src/kosmickrisp/meson.build new file mode 100644 index 00000000000..fe54329e132 --- /dev/null +++ b/src/kosmickrisp/meson.build @@ -0,0 +1,16 @@ +# Copyright 2025 LunarG, Inc. +# Copyright 2025 Google LLC +# SPDX-License-Identifier: MIT + +subdir('bridge') +subdir('compiler') +subdir('util') +subdir('vulkan') + +executable( + 'kosmicomp', + files('kosmicomp.c'), + dependencies : [idep_nir, idep_vtn, idep_vulkan_runtime_headers, idep_vulkan_util_headers], + link_with: [libkk], + link_args: ['-Wl,-undefined,dynamic_lookup'], +) diff --git a/src/kosmickrisp/util/kk_dispatch_trampolines_gen.py b/src/kosmickrisp/util/kk_dispatch_trampolines_gen.py new file mode 100644 index 00000000000..468f5679ea1 --- /dev/null +++ b/src/kosmickrisp/util/kk_dispatch_trampolines_gen.py @@ -0,0 +1,195 @@ +# coding=utf-8 +COPYRIGHT = """\ +/* + * Copyright 2020 Intel Corporation + * Copyright 2025 LunarG, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +""" + +import argparse +import os + +from mako.template import Template + +# Mesa-local imports must be declared in meson variable +# '{file_without_suffix}_depend_files'. +from vk_entrypoints import get_entrypoints_from_xml + +TEMPLATE_H = Template(COPYRIGHT + """\ +/* This file generated from ${filename}, don't edit directly. */ + +#ifndef VK_DISPATCH_TRAMPOLINES_H +#define VK_DISPATCH_TRAMPOLINES_H + +#include "vk_dispatch_table.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern struct vk_physical_device_dispatch_table kk_physical_device_trampolines; +extern struct vk_device_dispatch_table kk_device_trampolines; + +#ifdef __cplusplus +} +#endif + +#endif /* VK_DISPATCH_TRAMPOLINES_H */ +""") + +TEMPLATE_C = Template(COPYRIGHT + """\ +/* This file generated from ${filename}, don't edit directly. */ + +#include "kk_device.h" +#include "kk_dispatch_trampolines.h" +#include "vk_object.h" +#include "vk_physical_device.h" + +% for e in entrypoints: + % if not e.is_physical_device_entrypoint() or e.alias: + <% continue %> + % endif + % if e.guard is not None: +#ifdef ${e.guard} + % endif +static VKAPI_ATTR ${e.return_type} VKAPI_CALL +${e.prefixed_name('kk_tramp')}(${e.decl_params()}) +{ + <% assert e.params[0].type == 'VkPhysicalDevice' %> + VK_FROM_HANDLE(vk_physical_device, vk_physical_device, ${e.params[0].name}); + % if e.return_type == 'void': + vk_physical_device->dispatch_table.${e.name}(${e.call_params()}); + % else: + return vk_physical_device->dispatch_table.${e.name}(${e.call_params()}); + % endif +} + % if e.guard is not None: +#endif + % endif +% endfor + +struct vk_physical_device_dispatch_table kk_physical_device_trampolines = { +% for e in entrypoints: + % if not e.is_physical_device_entrypoint() or e.alias: + <% continue %> + % endif + % if e.guard is not None: +#ifdef ${e.guard} + % endif + .${e.name} = ${e.prefixed_name('kk_tramp')}, + % if e.guard is not None: +#endif + % endif +% endfor +}; + +% for e in entrypoints: + % if not e.is_device_entrypoint() or e.alias: + <% continue %> + % endif + % if e.guard is not None: +#ifdef ${e.guard} + % endif +static VKAPI_ATTR ${e.return_type} VKAPI_CALL +${e.prefixed_name('kk_tramp')}(${e.decl_params()}) +{ + % if e.params[0].type == 'VkDevice': + VK_FROM_HANDLE(kk_device, kk_device, ${e.params[0].name}); + % if e.return_type == 'void': + kk_device->exposed_dispatch_table.${e.name}(${e.call_params()}); + % else: + return kk_device->exposed_dispatch_table.${e.name}(${e.call_params()}); + % endif + % elif e.params[0].type in ('VkCommandBuffer', 'VkQueue'): + struct vk_object_base *vk_object = (struct vk_object_base *)${e.params[0].name}; + struct kk_device *kk_device = container_of(vk_object->device, struct kk_device, vk); + % if e.return_type == 'void': + kk_device->exposed_dispatch_table.${e.name}(${e.call_params()}); + % else: + return kk_device->exposed_dispatch_table.${e.name}(${e.call_params()}); + % endif + % else: + assert(!"Unhandled device child trampoline case: ${e.params[0].type}"); + % endif +} + % if e.guard is not None: +#endif + % endif +% endfor + +struct vk_device_dispatch_table kk_device_trampolines = { +% for e in entrypoints: + % if not e.is_device_entrypoint() or e.alias: + <% continue %> + % endif + % if e.guard is not None: +#ifdef ${e.guard} + % endif + .${e.name} = ${e.prefixed_name('kk_tramp')}, + % if e.guard is not None: +#endif + % endif +% endfor +}; +""") + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--out-c', help='Output C file.') + parser.add_argument('--out-h', help='Output H file.') + parser.add_argument('--beta', required=True, help='Enable beta extensions.') + parser.add_argument('--xml', + help='Vulkan API XML file.', + required=True, + action='append', + dest='xml_files') + args = parser.parse_args() + + entrypoints = get_entrypoints_from_xml(args.xml_files, args.beta) + + # For outputting entrypoints.h we generate a anv_EntryPoint() prototype + # per entry point. + try: + if args.out_h: + with open(args.out_h, 'w', encoding='utf-8') as f: + f.write(TEMPLATE_H.render(entrypoints=entrypoints, + filename=os.path.basename(__file__))) + if args.out_c: + with open(args.out_c, 'w', encoding='utf-8') as f: + f.write(TEMPLATE_C.render(entrypoints=entrypoints, + filename=os.path.basename(__file__))) + except Exception: + # In the event there's an error, this imports some helpers from mako + # to print a useful stack trace and prints it, then exits with + # status 1, if python is run with debug; otherwise it just raises + # the exception + if __debug__: + import sys + from mako import exceptions + sys.stderr.write(exceptions.text_error_template().render() + '\n') + sys.exit(1) + raise + + +if __name__ == '__main__': + main() diff --git a/src/kosmickrisp/util/meson.build b/src/kosmickrisp/util/meson.build new file mode 100644 index 00000000000..43316831cec --- /dev/null +++ b/src/kosmickrisp/util/meson.build @@ -0,0 +1,16 @@ +# Copyright © 2025 LunarG, Inc +# SPDX-License-Identifier: MIT + +kk_dispatch_trampolines_gen = files('kk_dispatch_trampolines_gen.py') + +kk_dispatch_trampolines = custom_target( + 'kk_dispatch_trampolines', + input : [kk_dispatch_trampolines_gen, vk_api_xml], + output : ['kk_dispatch_trampolines.c', 'kk_dispatch_trampolines.h'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', + '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@', + '--beta', with_vulkan_beta.to_string() + ], + depend_files : vk_dispatch_trampolines_gen_depend_files, +) diff --git a/src/kosmickrisp/util/vk_entrypoints.py b/src/kosmickrisp/util/vk_entrypoints.py new file mode 100644 index 00000000000..a8280bae2ff --- /dev/null +++ b/src/kosmickrisp/util/vk_entrypoints.py @@ -0,0 +1,147 @@ +# Copyright 2020 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import xml.etree.ElementTree as et + +from collections import OrderedDict, namedtuple + +# Mesa-local imports must be declared in meson variable +# '{file_without_suffix}_depend_files'. +from vk_extensions import get_all_required, filter_api + +EntrypointParam = namedtuple('EntrypointParam', 'type name decl len') + +class EntrypointBase: + def __init__(self, name): + assert name.startswith('vk') + self.name = name[2:] + self.alias = None + self.guard = None + self.entry_table_index = None + # Extensions which require this entrypoint + self.core_version = None + self.extensions = [] + + def prefixed_name(self, prefix): + return prefix + '_' + self.name + +class Entrypoint(EntrypointBase): + def __init__(self, name, return_type, params): + super(Entrypoint, self).__init__(name) + self.return_type = return_type + self.params = params + self.guard = None + self.aliases = [] + self.disp_table_index = None + + def is_physical_device_entrypoint(self): + return self.params[0].type in ('VkPhysicalDevice', ) + + def is_device_entrypoint(self): + return self.params[0].type in ('VkDevice', 'VkCommandBuffer', 'VkQueue') + + def decl_params(self, start=0): + return ', '.join(p.decl for p in self.params[start:]) + + def call_params(self, start=0): + return ', '.join(p.name for p in self.params[start:]) + +class EntrypointAlias(EntrypointBase): + def __init__(self, name, entrypoint): + super(EntrypointAlias, self).__init__(name) + self.alias = entrypoint + entrypoint.aliases.append(self) + + def is_physical_device_entrypoint(self): + return self.alias.is_physical_device_entrypoint() + + def is_device_entrypoint(self): + return self.alias.is_device_entrypoint() + + def prefixed_name(self, prefix): + return self.alias.prefixed_name(prefix) + + @property + def params(self): + return self.alias.params + + @property + def return_type(self): + return self.alias.return_type + + @property + def disp_table_index(self): + return self.alias.disp_table_index + + def decl_params(self): + return self.alias.decl_params() + + def call_params(self): + return self.alias.call_params() + +def get_entrypoints(doc, api, beta): + """Extract the entry points from the registry.""" + entrypoints = OrderedDict() + + required = get_all_required(doc, 'command', api, beta) + + for command in doc.findall('./commands/command'): + if not filter_api(command, api): + continue + + if 'alias' in command.attrib: + name = command.attrib['name'] + target = command.attrib['alias'] + e = EntrypointAlias(name, entrypoints[target]) + else: + name = command.find('./proto/name').text + ret_type = command.find('./proto/type').text + params = [EntrypointParam( + type=p.find('./type').text, + name=p.find('./name').text, + decl=''.join(p.itertext()), + len=p.attrib.get('altlen', p.attrib.get('len', None)) + ) for p in command.findall('./param') if filter_api(p, api)] + # They really need to be unique + e = Entrypoint(name, ret_type, params) + + if name not in required: + continue + + r = required[name] + e.core_version = r.core_version + e.extensions = r.extensions + e.guard = r.guard + + assert name not in entrypoints, name + entrypoints[name] = e + + return entrypoints.values() + +def get_entrypoints_from_xml(xml_files, beta, api='vulkan'): + entrypoints = [] + + for filename in xml_files: + doc = et.parse(filename) + entrypoints += get_entrypoints(doc, api, beta) + + return entrypoints diff --git a/src/kosmickrisp/util/vk_extensions.py b/src/kosmickrisp/util/vk_extensions.py new file mode 100644 index 00000000000..f58b04f71cb --- /dev/null +++ b/src/kosmickrisp/util/vk_extensions.py @@ -0,0 +1,371 @@ +import copy +import re +import xml.etree.ElementTree as et + +def get_api_list(s): + apis = [] + for a in s.split(','): + if a == 'disabled': + continue + assert a in ('vulkan', 'vulkansc') + apis.append(a) + return apis + +class Extension: + def __init__(self, name, number, ext_version): + self.name = name + self.type = None + self.number = number + self.platform = None + self.provisional = False + self.ext_version = int(ext_version) + self.supported = [] + + def from_xml(ext_elem): + name = ext_elem.attrib['name'] + number = int(ext_elem.attrib['number']) + supported = get_api_list(ext_elem.attrib['supported']) + if name == 'VK_ANDROID_native_buffer': + assert not supported + supported = ['vulkan'] + + if not supported: + return Extension(name, number, 0) + + version = None + for enum_elem in ext_elem.findall('.require/enum'): + if enum_elem.attrib['name'].endswith('_SPEC_VERSION'): + # Skip alias SPEC_VERSIONs + if 'value' in enum_elem.attrib: + assert version is None + version = int(enum_elem.attrib['value']) + + assert version is not None + ext = Extension(name, number, version) + ext.type = ext_elem.attrib['type'] + ext.platform = ext_elem.attrib.get('platform', None) + ext.provisional = ext_elem.attrib.get('provisional', False) + ext.supported = supported + + return ext + + def c_android_condition(self): + # if it's an EXT or vendor extension, it's allowed + if not self.name.startswith(ANDROID_EXTENSION_WHITELIST_PREFIXES): + return 'true' + + allowed_version = ALLOWED_ANDROID_VERSION.get(self.name, None) + if allowed_version is None: + return 'false' + + return 'ANDROID_API_LEVEL >= %d' % (allowed_version) + +class ApiVersion: + def __init__(self, version): + self.version = version + +class VkVersion: + def __init__(self, string): + split = string.split('.') + self.major = int(split[0]) + self.minor = int(split[1]) + if len(split) > 2: + assert len(split) == 3 + self.patch = int(split[2]) + else: + self.patch = None + + # Sanity check. The range bits are required by the definition of the + # VK_MAKE_VERSION macro + assert self.major < 1024 and self.minor < 1024 + assert self.patch is None or self.patch < 4096 + assert str(self) == string + + def __str__(self): + ver_list = [str(self.major), str(self.minor)] + if self.patch is not None: + ver_list.append(str(self.patch)) + return '.'.join(ver_list) + + def c_vk_version(self): + ver_list = [str(self.major), str(self.minor), str(self.patch or 0)] + return 'VK_MAKE_VERSION(' + ', '.join(ver_list) + ')' + + def __int_ver(self): + # This is just an expansion of VK_VERSION + return (self.major << 22) | (self.minor << 12) | (self.patch or 0) + + def __gt__(self, other): + # If only one of them has a patch version, "ignore" it by making + # other's patch version match self. + if (self.patch is None) != (other.patch is None): + other = copy.copy(other) + other.patch = self.patch + + return self.__int_ver() > other.__int_ver() + +# Sort the extension list the way we expect: KHR, then EXT, then vendors +# alphabetically. For digits, read them as a whole number sort that. +# eg.: VK_KHR_8bit_storage < VK_KHR_16bit_storage < VK_EXT_acquire_xlib_display +def extension_order(ext): + order = [] + for substring in re.split('(KHR|EXT|[0-9]+)', ext.name): + if substring == 'KHR': + order.append(1) + if substring == 'EXT': + order.append(2) + elif substring.isdigit(): + order.append(int(substring)) + else: + order.append(substring) + return order + +def get_all_exts_from_xml(xml, api='vulkan'): + """ Get a list of all Vulkan extensions. """ + + xml = et.parse(xml) + + extensions = [] + for ext_elem in xml.findall('.extensions/extension'): + ext = Extension.from_xml(ext_elem) + if api in ext.supported: + extensions.append(ext) + + return sorted(extensions, key=extension_order) + +def init_exts_from_xml(xml, extensions, platform_defines): + """ Walk the Vulkan XML and fill out extra extension information. """ + + xml = et.parse(xml) + + ext_name_map = {} + for ext in extensions: + ext_name_map[ext.name] = ext + + # KHR_display is missing from the list. + platform_defines.append('VK_USE_PLATFORM_DISPLAY_KHR') + for platform in xml.findall('./platforms/platform'): + platform_defines.append(platform.attrib['protect']) + + for ext_elem in xml.findall('.extensions/extension'): + ext_name = ext_elem.attrib['name'] + if ext_name not in ext_name_map: + continue + + ext = ext_name_map[ext_name] + ext.type = ext_elem.attrib['type'] + +class Requirements: + def __init__(self, core_version=None): + self.core_version = core_version + self.extensions = [] + self.guard = None + + def add_extension(self, ext): + for e in self.extensions: + if e == ext: + return; + assert e.name != ext.name + + self.extensions.append(ext) + +def filter_api(elem, api): + if 'api' not in elem.attrib: + return True + + return api in elem.attrib['api'].split(',') + +def get_alias(aliases, name): + if name in aliases: + # in case the spec registry adds an alias chain later + return get_alias(aliases, aliases[name]) + return name + +def get_all_required(xml, thing, api, beta): + things = {} + aliases = {} + for struct in xml.findall('./types/type[@category="struct"][@alias]'): + if not filter_api(struct, api): + continue + + name = struct.attrib['name'] + alias = struct.attrib['alias'] + aliases[name] = alias + + for feature in xml.findall('./feature'): + if not filter_api(feature, api): + continue + + version = VkVersion(feature.attrib['number']) + for t in feature.findall('./require/' + thing): + name = t.attrib['name'] + assert name not in things + things[name] = Requirements(core_version=version) + + for extension in xml.findall('.extensions/extension'): + ext = Extension.from_xml(extension) + if api not in ext.supported: + continue + + if beta != 'true' and ext.provisional: + continue + + for require in extension.findall('./require'): + if not filter_api(require, api): + continue + + for t in require.findall('./' + thing): + name = get_alias(aliases, t.attrib['name']) + r = things.setdefault(name, Requirements()) + r.add_extension(ext) + + platform_defines = {} + for platform in xml.findall('./platforms/platform'): + name = platform.attrib['name'] + define = platform.attrib['protect'] + platform_defines[name] = define + + for req in things.values(): + if req.core_version is not None: + continue + + for ext in req.extensions: + if ext.platform in platform_defines: + req.guard = platform_defines[ext.platform] + break + + return things + +# Mapping between extension name and the android version in which the extension +# was whitelisted in Android CTS's dEQP-VK.info.device_extensions and +# dEQP-VK.api.info.android.no_unknown_extensions, excluding those blocked by +# android.graphics.cts.VulkanFeaturesTest#testVulkanBlockedExtensions. +ALLOWED_ANDROID_VERSION = { + # checkInstanceExtensions on oreo-cts-release + "VK_KHR_surface": 26, + "VK_KHR_display": 26, + "VK_KHR_android_surface": 26, + "VK_KHR_mir_surface": 26, + "VK_KHR_wayland_surface": 26, + "VK_KHR_win32_surface": 26, + "VK_KHR_xcb_surface": 26, + "VK_KHR_xlib_surface": 26, + "VK_KHR_get_physical_device_properties2": 26, + "VK_KHR_get_surface_capabilities2": 26, + "VK_KHR_external_memory_capabilities": 26, + "VK_KHR_external_semaphore_capabilities": 26, + "VK_KHR_external_fence_capabilities": 26, + # on pie-cts-release + "VK_KHR_device_group_creation": 28, + "VK_KHR_get_display_properties2": 28, + # on android10-tests-release + "VK_KHR_surface_protected_capabilities": 29, + # on android13-tests-release + "VK_KHR_portability_enumeration": 33, + + # checkDeviceExtensions on oreo-cts-release + "VK_KHR_swapchain": 26, + "VK_KHR_display_swapchain": 26, + "VK_KHR_sampler_mirror_clamp_to_edge": 26, + "VK_KHR_shader_draw_parameters": 26, + "VK_KHR_maintenance1": 26, + "VK_KHR_push_descriptor": 26, + "VK_KHR_descriptor_update_template": 26, + "VK_KHR_incremental_present": 26, + "VK_KHR_shared_presentable_image": 26, + "VK_KHR_storage_buffer_storage_class": 26, + "VK_KHR_16bit_storage": 26, + "VK_KHR_get_memory_requirements2": 26, + "VK_KHR_external_memory": 26, + "VK_KHR_external_memory_fd": 26, + "VK_KHR_external_memory_win32": 26, + "VK_KHR_external_semaphore": 26, + "VK_KHR_external_semaphore_fd": 26, + "VK_KHR_external_semaphore_win32": 26, + "VK_KHR_external_fence": 26, + "VK_KHR_external_fence_fd": 26, + "VK_KHR_external_fence_win32": 26, + "VK_KHR_win32_keyed_mutex": 26, + "VK_KHR_dedicated_allocation": 26, + "VK_KHR_variable_pointers": 26, + "VK_KHR_relaxed_block_layout": 26, + "VK_KHR_bind_memory2": 26, + "VK_KHR_maintenance2": 26, + "VK_KHR_image_format_list": 26, + "VK_KHR_sampler_ycbcr_conversion": 26, + # on oreo-mr1-cts-release + "VK_KHR_draw_indirect_count": 27, + # on pie-cts-release + "VK_KHR_device_group": 28, + "VK_KHR_multiview": 28, + "VK_KHR_maintenance3": 28, + "VK_KHR_create_renderpass2": 28, + "VK_KHR_driver_properties": 28, + # on android10-tests-release + "VK_KHR_shader_float_controls": 29, + "VK_KHR_shader_float16_int8": 29, + "VK_KHR_8bit_storage": 29, + "VK_KHR_depth_stencil_resolve": 29, + "VK_KHR_swapchain_mutable_format": 29, + "VK_KHR_shader_atomic_int64": 29, + "VK_KHR_vulkan_memory_model": 29, + "VK_KHR_swapchain_mutable_format": 29, + "VK_KHR_uniform_buffer_standard_layout": 29, + # on android11-tests-release + "VK_KHR_imageless_framebuffer": 30, + "VK_KHR_shader_subgroup_extended_types": 30, + "VK_KHR_buffer_device_address": 30, + "VK_KHR_separate_depth_stencil_layouts": 30, + "VK_KHR_timeline_semaphore": 30, + "VK_KHR_spirv_1_4": 30, + "VK_KHR_pipeline_executable_properties": 30, + "VK_KHR_shader_clock": 30, + # blocked by testVulkanBlockedExtensions + # "VK_KHR_performance_query": 30, + "VK_KHR_shader_non_semantic_info": 30, + "VK_KHR_copy_commands2": 30, + # on android12-tests-release + "VK_KHR_shader_terminate_invocation": 31, + "VK_KHR_ray_tracing_pipeline": 31, + "VK_KHR_ray_query": 31, + "VK_KHR_acceleration_structure": 31, + "VK_KHR_pipeline_library": 31, + "VK_KHR_deferred_host_operations": 31, + "VK_KHR_fragment_shading_rate": 31, + "VK_KHR_zero_initialize_workgroup_memory": 31, + "VK_KHR_workgroup_memory_explicit_layout": 31, + "VK_KHR_synchronization2": 31, + "VK_KHR_shader_integer_dot_product": 31, + # on android13-tests-release + "VK_KHR_dynamic_rendering": 33, + "VK_KHR_format_feature_flags2": 33, + "VK_KHR_global_priority": 33, + "VK_KHR_maintenance4": 33, + "VK_KHR_portability_subset": 33, + "VK_KHR_present_id": 33, + "VK_KHR_present_wait": 33, + "VK_KHR_shader_subgroup_uniform_control_flow": 33, + + # testNoUnknownExtensions on oreo-cts-release + "VK_GOOGLE_display_timing": 26, + # on pie-cts-release + "VK_ANDROID_external_memory_android_hardware_buffer": 28, + # on android11-tests-release + "VK_GOOGLE_decorate_string": 30, + "VK_GOOGLE_hlsl_functionality1": 30, + # on android13-tests-release + "VK_GOOGLE_surfaceless_query": 33, + + # this HAL extension is always allowed and will be filtered out by the + # loader + "VK_ANDROID_native_buffer": 26, +} + +# Extensions with these prefixes are checked in Android CTS, and thus must be +# whitelisted per the preceding dict. +ANDROID_EXTENSION_WHITELIST_PREFIXES = ( + "VK_KHX", + "VK_KHR", + "VK_GOOGLE", + "VK_ANDROID" +) diff --git a/src/kosmickrisp/vulkan/cl/kk_query.cl b/src/kosmickrisp/vulkan/cl/kk_query.cl new file mode 100644 index 00000000000..ef24ab8ab7b --- /dev/null +++ b/src/kosmickrisp/vulkan/cl/kk_query.cl @@ -0,0 +1,50 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright © 2024 Alyssa Rosenzweig + * Copyright © 2024 Valve Corporation + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ +#include "compiler/libcl/libcl_vk.h" + +#include "kk_query.h" + +void +libkk_write_u64(global struct libkk_imm_write *write_array) +{ + *write_array[cl_group_id.x].address = write_array[cl_group_id.x].value; +} + +void +libkk_copy_queries(global uint64_t *availability, global uint64_t *results, + global uint16_t *oq_index, uint64_t dst_addr, + uint64_t dst_stride, uint32_t first_query, + VkQueryResultFlagBits flags, uint16_t reports_per_query) +{ + uint index = cl_group_id.x; + uint64_t dst = dst_addr + (((uint64_t)index) * dst_stride); + uint32_t query = first_query + index; + + bool available; + if (availability) + available = availability[query]; + else + available = (results[query] != LIBKK_QUERY_UNAVAILABLE); + + if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) { + /* For occlusion queries, results[] points to the device global heap. We + * need to remap indices according to the query pool's allocation. + */ + uint result_index = oq_index ? oq_index[query] : query; + uint idx = result_index * reports_per_query; + + for (unsigned i = 0; i < reports_per_query; ++i) { + vk_write_query(dst, i, flags, results[idx + i]); + } + } + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + vk_write_query(dst, reports_per_query, flags, available); + } +} diff --git a/src/kosmickrisp/vulkan/cl/kk_query.h b/src/kosmickrisp/vulkan/cl/kk_query.h new file mode 100644 index 00000000000..8b37d36bb74 --- /dev/null +++ b/src/kosmickrisp/vulkan/cl/kk_query.h @@ -0,0 +1,21 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright © 2024 Alyssa Rosenzweig + * Copyright © 2024 Valve Corporation + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ +#ifndef KK_QUERY_H +#define KK_QUERY_H + +#include "compiler/libcl/libcl.h" + +struct libkk_imm_write { + DEVICE(uint64_t) address; + uint64_t value; +}; + +#define LIBKK_QUERY_UNAVAILABLE (uint64_t)((int64_t)-1) + +#endif /* KK_QUERY_H */ diff --git a/src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl b/src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl new file mode 100644 index 00000000000..bc2e250d072 --- /dev/null +++ b/src/kosmickrisp/vulkan/cl/kk_triangle_fan.cl @@ -0,0 +1,283 @@ +/* + * Copyright 2023 Alyssa Rosenzweig + * Copyright 2023 Valve Corporation + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ +#include "compiler/libcl/libcl_vk.h" +#include "compiler/shader_enums.h" + +static uint +libkk_vertex_id_for_line_loop(uint prim, uint vert, uint num_prims) +{ + /* (0, 1), (1, 2), (2, 0) */ + if (prim == (num_prims - 1) && vert == 1) + return 0; + else + return prim + vert; +} + +/* Swap the two non-provoking vertices third vert in odd triangles. This + * generates a vertex ID list with a consistent winding order. + * + * With prim and flatshade_first, the map : [0, 1, 2] -> [0, 1, 2] is its own + * inverse. This lets us reuse it for both vertex fetch and transform feedback. + */ +static uint +libagx_map_vertex_in_tri_strip(uint prim, uint vert, bool flatshade_first) +{ + unsigned pv = flatshade_first ? 0 : 2; + + bool even = (prim & 1) == 0; + bool provoking = vert == pv; + + return (provoking || even) ? vert : ((3 - pv) - vert); +} + +static uint +libkk_vertex_id_for_tri_fan(uint prim, uint vert, bool flatshade_first) +{ + /* Vulkan spec section 20.1.7 gives (i + 1, i + 2, 0) for a provoking + * first. OpenGL instead wants (0, i + 1, i + 2) with a provoking last. + * Piglit clipflat expects us to switch between these orders depending on + * provoking vertex, to avoid trivializing the fan. + * + * Rotate accordingly. + */ + if (flatshade_first) { + vert = (vert == 2) ? 0 : (vert + 1); + } + + /* The simpler form assuming last is provoking. */ + return (vert == 0) ? 0 : prim + vert; +} + +static uint +libkk_vertex_id_for_tri_strip_adj(uint prim, uint vert, uint num_prims, + bool flatshade_first) +{ + /* See Vulkan spec section 20.1.11 "Triangle Strips With Adjancency". + * + * There are different cases for first/middle/last/only primitives and for + * odd/even primitives. Determine which case we're in. + */ + bool last = prim == (num_prims - 1); + bool first = prim == 0; + bool even = (prim & 1) == 0; + bool even_or_first = even || first; + + /* When the last vertex is provoking, we rotate the primitives + * accordingly. This seems required for OpenGL. + */ + if (!flatshade_first && !even_or_first) { + vert = (vert + 4u) % 6u; + } + + /* Offsets per the spec. The spec lists 6 cases with 6 offsets. Luckily, + * there are lots of patterns we can exploit, avoiding a full 6x6 LUT. + * + * Here we assume the first vertex is provoking, the Vulkan default. + */ + uint offsets[6] = { + 0, + first ? 1 : (even ? -2 : 3), + even_or_first ? 2 : 4, + last ? 5 : 6, + even_or_first ? 4 : 2, + even_or_first ? 3 : -2, + }; + + /* Ensure NIR can see thru the local array */ + uint offset = 0; + for (uint i = 1; i < 6; ++i) { + if (i == vert) + offset = offsets[i]; + } + + /* Finally add to the base of the primitive */ + return (prim * 2) + offset; +} + +static uint +vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first, uint prim, + uint vert, uint num_prims) +{ + switch (mode) { + case MESA_PRIM_POINTS: + case MESA_PRIM_LINES: + case MESA_PRIM_TRIANGLES: + case MESA_PRIM_LINES_ADJACENCY: + case MESA_PRIM_TRIANGLES_ADJACENCY: + /* Regular primitive: every N vertices defines a primitive */ + return (prim * mesa_vertices_per_prim(mode)) + vert; + + case MESA_PRIM_LINE_LOOP: + return libkk_vertex_id_for_line_loop(prim, vert, num_prims); + + case MESA_PRIM_LINE_STRIP: + case MESA_PRIM_LINE_STRIP_ADJACENCY: + /* (i, i + 1) or (i, ..., i + 3) */ + return prim + vert; + + case MESA_PRIM_TRIANGLE_STRIP: { + /* Order depends on the provoking vert. + * + * First: (0, 1, 2), (1, 3, 2), (2, 3, 4). + * Last: (0, 1, 2), (2, 1, 3), (2, 3, 4). + * + * Pull the (maybe swapped) vert from the corresponding primitive + */ + return prim + libagx_map_vertex_in_tri_strip(prim, vert, flatshade_first); + } + + case MESA_PRIM_TRIANGLE_FAN: + return libkk_vertex_id_for_tri_fan(prim, vert, flatshade_first); + + case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: + return libkk_vertex_id_for_tri_strip_adj(prim, vert, num_prims, + flatshade_first); + + default: + return 0; + } +} + +static void +store_index(global uint8_t *index_buffer, uint index_size_B, uint id, + uint value) +{ + global uint32_t *out_32 = (global uint32_t *)index_buffer; + global uint16_t *out_16 = (global uint16_t *)index_buffer; + global uint8_t *out_8 = (global uint8_t *)index_buffer; + + if (index_size_B == 4) + out_32[id] = value; + else if (index_size_B == 2) + out_16[id] = value; + else + out_8[id] = value; +} + +static uint +load_index(constant uint8_t *index_buffer, uint32_t index_buffer_range_el, + uint id, uint index_size) +{ + /* We have no index buffer, index is the id */ + if (index_buffer == 0u) + return id; + + /* When no index_buffer is present, index_buffer_range_el is vtx count */ + bool oob = id >= index_buffer_range_el; + + /* If the load would be out-of-bounds, load the first element which is + * assumed valid. If the application index buffer is empty with robustness2, + * index_buffer will point to a zero sink where only the first is valid. + */ + if (oob) { + id = 0u; + } + + uint el; + if (index_size == 1) { + el = ((constant uint8_t *)index_buffer)[id]; + } else if (index_size == 2) { + el = ((constant uint16_t *)index_buffer)[id]; + } else { + el = ((constant uint32_t *)index_buffer)[id]; + } + + /* D3D robustness semantics. TODO: Optimize? */ + if (oob) { + el = 0; + } + + return el; +} + +/* + * Return the ID of the first thread in the workgroup where cond is true, or + * 1024 if cond is false across the workgroup. + */ +static uint +first_true_thread_in_workgroup(bool cond, local uint *scratch) +{ + barrier(CLK_LOCAL_MEM_FENCE); + scratch[get_sub_group_id()] = sub_group_ballot(cond)[0]; + barrier(CLK_LOCAL_MEM_FENCE); + + uint first_group = + ctz(sub_group_ballot(scratch[get_sub_group_local_id()])[0]); + uint off = ctz(first_group < 32 ? scratch[first_group] : 0); + return (first_group * 32) + off; +} + +// TODO_KOSMICKRISP +// KERNEL(1024) +void +libkk_unroll_geometry_and_restart( + constant uint8_t *index_buffer, global uint8_t *out_ptr, + constant uint32_t *in_draw, global uint32_t *out_draw, + uint32_t restart_index, uint32_t index_buffer_size_el, uint32_t in_el_size_B, + uint32_t out_el_size_B, uint32_t flatshade_first, uint32_t mode) +{ + uint tid = cl_local_id.x; + uint count = in_draw[0]; + + constant uint8_t *in_ptr = + index_buffer ? index_buffer + (in_draw[2] * in_el_size_B) : index_buffer; + + // local uint scratch[32]; + + uint out_prims = 0; + uint needle = 0; + uint per_prim = mesa_vertices_per_prim(mode); + while (needle < count) { + /* Search for next restart or the end. Lanes load in parallel. */ + uint next_restart = needle; + for (;;) { + uint idx = next_restart + tid; + bool restart = + idx >= count || load_index(in_ptr, index_buffer_size_el, idx, + in_el_size_B) == restart_index; + + // uint next_offs = first_true_thread_in_workgroup(restart, scratch); + + // next_restart += next_offs; + // if (next_offs < 1024) + // break; + if (restart) + break; + next_restart++; + } + + /* Emit up to the next restart. Lanes output in parallel */ + uint subcount = next_restart - needle; + uint subprims = u_decomposed_prims_for_vertices(mode, subcount); + uint out_prims_base = out_prims; + for (uint i = tid; i < subprims; /*i += 1024*/ ++i) { + for (uint vtx = 0; vtx < per_prim; ++vtx) { + uint id = + vertex_id_for_topology(mode, flatshade_first, i, vtx, subprims); + uint offset = needle + id; + + uint x = ((out_prims_base + i) * per_prim) + vtx; + uint y = + load_index(in_ptr, index_buffer_size_el, offset, in_el_size_B); + + store_index(out_ptr, out_el_size_B, x, y); + } + } + + out_prims += subprims; + needle = next_restart + 1; + } + + if (tid == 0) { + out_draw[0] = out_prims * per_prim; /* indexCount */ + out_draw[1] = in_draw[1]; /* instanceCount */ + out_draw[2] = 0u; /* firstIndex */ + out_draw[3] = index_buffer ? in_draw[3] : in_draw[2]; /* vertexOffset */ + out_draw[4] = index_buffer ? in_draw[4] : in_draw[3]; /* firstInstance */ + } +} diff --git a/src/kosmickrisp/vulkan/kk_bo.c b/src/kosmickrisp/vulkan/kk_bo.c new file mode 100644 index 00000000000..708b93fb2ed --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_bo.c @@ -0,0 +1,70 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_bo.h" + +#include "kk_device.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +#include "util/u_memory.h" + +VkResult +kk_alloc_bo(struct kk_device *dev, struct vk_object_base *log_obj, + uint64_t size_B, uint64_t align_B, struct kk_bo **bo_out) +{ + VkResult result = VK_SUCCESS; + + // TODO_KOSMICKRISP: Probably requires handling the buffer maximum 256MB + uint64_t minimum_alignment = 0u; + mtl_heap_buffer_size_and_align_with_length(dev->mtl_handle, &size_B, + &minimum_alignment); + minimum_alignment = MAX2(minimum_alignment, align_B); + size_B = align64(size_B, minimum_alignment); + mtl_heap *handle = + mtl_new_heap(dev->mtl_handle, size_B, KK_MTL_RESOURCE_OPTIONS); + if (handle == NULL) { + result = vk_errorf(log_obj, VK_ERROR_OUT_OF_DEVICE_MEMORY, "%m"); + goto fail_heap; + } + + mtl_buffer *map = mtl_new_buffer_with_length(handle, size_B, 0u); + if (map == NULL) { + result = vk_errorf(log_obj, VK_ERROR_OUT_OF_DEVICE_MEMORY, "%m"); + goto fail_map; + } + + struct kk_bo *bo = CALLOC_STRUCT(kk_bo); + + if (bo == NULL) { + result = vk_errorf(log_obj, VK_ERROR_OUT_OF_HOST_MEMORY, "%m"); + goto fail_alloc; + } + + bo->mtl_handle = handle; + bo->size_B = size_B; + bo->map = map; + bo->gpu = mtl_buffer_get_gpu_address(map); + bo->cpu = mtl_get_contents(map); + + *bo_out = bo; + return result; + +fail_alloc: + mtl_release(map); +fail_map: + mtl_release(handle); +fail_heap: + return result; +} + +void +kk_destroy_bo(struct kk_device *dev, struct kk_bo *bo) +{ + mtl_release(bo->map); + mtl_release(bo->mtl_handle); + FREE(bo); +} diff --git a/src/kosmickrisp/vulkan/kk_bo.h b/src/kosmickrisp/vulkan/kk_bo.h new file mode 100644 index 00000000000..5a97723720b --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_bo.h @@ -0,0 +1,32 @@ +/* + * Copyright © 2025 LunarG, Inc + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_BO_H +#define KK_BO_H 1 + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "vulkan/vulkan_core.h" + +#include <inttypes.h> + +struct kk_device; +struct vk_object_base; + +struct kk_bo { + mtl_heap *mtl_handle; + mtl_buffer *map; + uint64_t size_B; + uint64_t gpu; // GPU address + void *cpu; // CPU address +}; + +VkResult kk_alloc_bo(struct kk_device *dev, struct vk_object_base *log_obj, + uint64_t size_B, uint64_t align_B, struct kk_bo **bo_out); + +void kk_destroy_bo(struct kk_device *dev, struct kk_bo *bo); + +#endif /* KK_BO_H */ diff --git a/src/kosmickrisp/vulkan/kk_buffer.c b/src/kosmickrisp/vulkan/kk_buffer.c new file mode 100644 index 00000000000..75c10ab86f9 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_buffer.c @@ -0,0 +1,209 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_buffer.h" + +#include "kk_device.h" +#include "kk_entrypoints.h" +#include "kk_physical_device.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +static uint64_t +kk_get_buffer_alignment(const struct kk_physical_device *pdev, uint64_t size, + VkBufferUsageFlags2KHR usage_flags, + VkBufferCreateFlags create_flags) +{ + uint64_t alignment; + mtl_heap_buffer_size_and_align_with_length(pdev->mtl_dev_handle, &size, + &alignment); + + /** TODO_KOSMICKRISP Metal requires that texel buffers be aligned to the + * format they'll use. Since we won't be able to know the format until the + * view is created, we should align to the worst case scenario. For this, we + * need to request all supported format alignments and take the largest one. + */ + return alignment; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateBuffer(VkDevice device, const VkBufferCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer) +{ + VK_FROM_HANDLE(kk_device, dev, device); + struct kk_buffer *buffer; + + if (pCreateInfo->size > KK_MAX_BUFFER_SIZE) + return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); + + buffer = + vk_buffer_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*buffer)); + if (!buffer) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + *pBuffer = kk_buffer_to_handle(buffer); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +kk_DestroyBuffer(VkDevice device, VkBuffer _buffer, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_buffer, buffer, _buffer); + + if (!buffer) + return; + + if (buffer->mtl_handle) + mtl_release(buffer->mtl_handle); + + vk_buffer_destroy(&dev->vk, pAllocator, &buffer->vk); +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetDeviceBufferMemoryRequirements( + VkDevice device, const VkDeviceBufferMemoryRequirements *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + VK_FROM_HANDLE(kk_device, dev, device); + struct kk_physical_device *pdev = kk_device_physical(dev); + + const uint64_t alignment = kk_get_buffer_alignment( + pdev, pInfo->pCreateInfo->size, pInfo->pCreateInfo->usage, + pInfo->pCreateInfo->flags); + + pMemoryRequirements->memoryRequirements = (VkMemoryRequirements){ + .size = align64(pInfo->pCreateInfo->size, alignment), + .alignment = alignment, + .memoryTypeBits = BITFIELD_MASK(pdev->mem_type_count), + }; + + vk_foreach_struct_const(ext, pMemoryRequirements->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { + VkMemoryDedicatedRequirements *dedicated = (void *)ext; + dedicated->prefersDedicatedAllocation = false; + dedicated->requiresDedicatedAllocation = false; + break; + } + default: + vk_debug_ignored_stype(ext->sType); + break; + } + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetPhysicalDeviceExternalBufferProperties( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo, + VkExternalBufferProperties *pExternalBufferProperties) +{ + /* The Vulkan 1.3.256 spec says: + * + * VUID-VkPhysicalDeviceExternalBufferInfo-handleType-parameter + * + * "handleType must be a valid VkExternalMemoryHandleTypeFlagBits value" + * + * This differs from VkPhysicalDeviceExternalImageFormatInfo, which + * surprisingly permits handleType == 0. + */ + assert(pExternalBufferInfo->handleType != 0); + + /* All of the current flags are for sparse which we don't support yet. + * Even when we do support it, doing sparse on external memory sounds + * sketchy. Also, just disallowing flags is the safe option. + */ + if (pExternalBufferInfo->flags) + goto unsupported; + + switch (pExternalBufferInfo->handleType) { + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT: + pExternalBufferProperties->externalMemoryProperties = + kk_mtlheap_mem_props; + return; + default: + goto unsupported; + } + +unsupported: + /* From the Vulkan 1.3.256 spec: + * + * compatibleHandleTypes must include at least handleType. + */ + pExternalBufferProperties->externalMemoryProperties = + (VkExternalMemoryProperties){ + .compatibleHandleTypes = pExternalBufferInfo->handleType, + }; +} + +static VkResult +kk_bind_buffer_memory(struct kk_device *dev, const VkBindBufferMemoryInfo *info) +{ + // Do the actual memory binding + VK_FROM_HANDLE(kk_device_memory, mem, info->memory); + VK_FROM_HANDLE(kk_buffer, buffer, info->buffer); + + buffer->mtl_handle = mtl_new_buffer_with_length( + mem->bo->mtl_handle, buffer->vk.size, info->memoryOffset); + buffer->vk.device_address = mtl_buffer_get_gpu_address(buffer->mtl_handle); + /* We need Metal to give us a CPU mapping so it correctly captures the + * data in the GPU debugger... */ + mtl_get_contents(buffer->mtl_handle); + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_BindBufferMemory2(VkDevice device, uint32_t bindInfoCount, + const VkBindBufferMemoryInfo *pBindInfos) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VkResult first_error_or_success = VK_SUCCESS; + + for (uint32_t i = 0; i < bindInfoCount; ++i) { + VkResult result = kk_bind_buffer_memory(dev, &pBindInfos[i]); + + const VkBindMemoryStatusKHR *status = + vk_find_struct_const(pBindInfos[i].pNext, BIND_MEMORY_STATUS_KHR); + if (status != NULL && status->pResult != NULL) + *status->pResult = result; + + if (first_error_or_success == VK_SUCCESS) + first_error_or_success = result; + } + + return first_error_or_success; +} + +VKAPI_ATTR VkDeviceAddress VKAPI_CALL +kk_GetBufferDeviceAddress(UNUSED VkDevice device, + const VkBufferDeviceAddressInfo *pInfo) +{ + VK_FROM_HANDLE(kk_buffer, buffer, pInfo->buffer); + + return vk_buffer_address(&buffer->vk, 0); +} + +VKAPI_ATTR uint64_t VKAPI_CALL +kk_GetBufferOpaqueCaptureAddress(UNUSED VkDevice device, + const VkBufferDeviceAddressInfo *pInfo) +{ + VK_FROM_HANDLE(kk_buffer, buffer, pInfo->buffer); + + return vk_buffer_address(&buffer->vk, 0); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_GetBufferOpaqueCaptureDescriptorDataEXT( + VkDevice device, const VkBufferCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} diff --git a/src/kosmickrisp/vulkan/kk_buffer.h b/src/kosmickrisp/vulkan/kk_buffer.h new file mode 100644 index 00000000000..1d9744678b6 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_buffer.h @@ -0,0 +1,48 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_BUFFER_H +#define KK_BUFFER_H 1 + +#include "kk_device_memory.h" +#include "kk_private.h" + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "vk_buffer.h" + +struct kk_buffer { + struct vk_buffer vk; + mtl_buffer *mtl_handle; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_buffer, vk.base, VkBuffer, + VK_OBJECT_TYPE_BUFFER) + +static inline struct kk_addr_range +kk_buffer_addr_range(const struct kk_buffer *buffer, uint64_t offset, + uint64_t range) +{ + if (buffer == NULL) + return (struct kk_addr_range){.range = 0}; + + return (struct kk_addr_range){ + .addr = vk_buffer_address(&buffer->vk, offset), + .range = vk_buffer_range(&buffer->vk, offset, range), + }; +} + +static inline mtl_resource * +kk_buffer_to_mtl_resource(const struct kk_buffer *buffer) +{ + if (buffer != NULL) { + return (mtl_resource *)buffer->mtl_handle; + } + return NULL; +} + +#endif // KK_BUFFER_H diff --git a/src/kosmickrisp/vulkan/kk_buffer_view.c b/src/kosmickrisp/vulkan/kk_buffer_view.c new file mode 100644 index 00000000000..0ee011f2f73 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_buffer_view.c @@ -0,0 +1,124 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_buffer_view.h" + +#include "kk_buffer.h" +#include "kk_device.h" +#include "kk_entrypoints.h" +#include "kk_format.h" +#include "kk_image_layout.h" +#include "kk_nir_lower_vbo.h" +#include "kk_physical_device.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" +#include "kosmickrisp/bridge/mtl_format.h" + +#include "vk_format.h" + +VkFormatFeatureFlags2 +kk_get_buffer_format_features(struct kk_physical_device *pdev, + VkFormat vk_format) +{ + VkFormatFeatureFlags2 features = 0; + enum pipe_format p_format = vk_format_to_pipe_format(vk_format); + + if (p_format == PIPE_FORMAT_NONE) + return 0; + + const struct kk_va_format *format = kk_get_va_format(p_format); + if (format) { + if (format->texel_buffer.read) + features |= VK_FORMAT_FEATURE_2_UNIFORM_TEXEL_BUFFER_BIT; + + if (format->texel_buffer.write) + features |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT; + + /* Only these formats allow atomics for texel buffers */ + if (vk_format == VK_FORMAT_R32_UINT || vk_format == VK_FORMAT_R32_SINT) + features |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; + } + + if (kk_vbo_supports_format(p_format)) + features |= VK_FORMAT_FEATURE_2_VERTEX_BUFFER_BIT; + + return features; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkBufferView *pBufferView) +{ + VK_FROM_HANDLE(kk_device, dev, _device); + struct kk_buffer_view *view = + vk_buffer_view_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*view)); + if (!view) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + enum pipe_format p_format = vk_format_to_pipe_format(view->vk.format); + const struct kk_va_format *supported_format = kk_get_va_format(p_format); + + /* If we reached here, we support reading at least */ + enum mtl_texture_usage usage = MTL_TEXTURE_USAGE_SHADER_READ; + if (supported_format->texel_buffer.write) + usage |= MTL_TEXTURE_USAGE_SHADER_WRITE; + + /* Only these formats allow atomics for texel buffers */ + if (view->vk.format == VK_FORMAT_R32_UINT || + view->vk.format == VK_FORMAT_R32_SINT) + usage |= MTL_TEXTURE_USAGE_SHADER_ATOMIC; + + struct kk_image_layout layout = { + .width_px = view->vk.elements, + .height_px = 1u, + .depth_px = 1u, + .layers = 1u, + .type = MTL_TEXTURE_TYPE_TEXTURE_BUFFER, + .sample_count_sa = 1u, + .levels = 1u, + .optimized_layout = false, + .usage = usage, + .format = {.pipe = p_format, .mtl = supported_format->mtl_pixel_format}, + .swizzle = + { + .red = supported_format->swizzle.red, + .green = supported_format->swizzle.green, + .blue = supported_format->swizzle.blue, + .alpha = supported_format->swizzle.alpha, + }, + .linear_stride_B = view->vk.range, + }; + struct kk_buffer *buffer = + container_of(view->vk.buffer, struct kk_buffer, vk); + view->mtl_texel_buffer_handle = mtl_new_texture_with_descriptor_linear( + buffer->mtl_handle, &layout, view->vk.offset); + if (!view->mtl_texel_buffer_handle) { + vk_buffer_view_destroy(&dev->vk, pAllocator, &view->vk); + return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + view->texel_buffer_gpu_id = + mtl_texture_get_gpu_resource_id(view->mtl_texel_buffer_handle); + + *pBufferView = kk_buffer_view_to_handle(view); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +kk_DestroyBufferView(VkDevice _device, VkBufferView bufferView, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_device, dev, _device); + VK_FROM_HANDLE(kk_buffer_view, view, bufferView); + + if (!view) + return; + + mtl_release(view->mtl_texel_buffer_handle); + vk_buffer_view_destroy(&dev->vk, pAllocator, &view->vk); +} diff --git a/src/kosmickrisp/vulkan/kk_buffer_view.h b/src/kosmickrisp/vulkan/kk_buffer_view.h new file mode 100644 index 00000000000..8525e50b760 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_buffer_view.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_BUFFER_VIEW_H +#define KK_BUFFER_VIEW_H 1 + +#include "kk_private.h" + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "vk_buffer_view.h" + +struct kk_physical_device; + +VkFormatFeatureFlags2 +kk_get_buffer_format_features(struct kk_physical_device *pdev, VkFormat format); + +struct kk_buffer_view { + struct vk_buffer_view vk; + mtl_texture *mtl_texel_buffer_handle; + uint64_t texel_buffer_gpu_id; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_buffer_view, vk.base, VkBufferView, + VK_OBJECT_TYPE_BUFFER_VIEW) + +#endif diff --git a/src/kosmickrisp/vulkan/kk_cmd_buffer.c b/src/kosmickrisp/vulkan/kk_cmd_buffer.c new file mode 100644 index 00000000000..c4366012a8f --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_cmd_buffer.c @@ -0,0 +1,533 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_cmd_buffer.h" + +#include "kk_buffer.h" +#include "kk_cmd_pool.h" +#include "kk_descriptor_set_layout.h" +#include "kk_encoder.h" +#include "kk_entrypoints.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +#include "vk_alloc.h" +#include "vk_pipeline_layout.h" + +static void +kk_descriptor_state_fini(struct kk_cmd_buffer *cmd, + struct kk_descriptor_state *desc) +{ + struct kk_cmd_pool *pool = kk_cmd_buffer_pool(cmd); + + for (unsigned i = 0; i < KK_MAX_SETS; i++) { + vk_free(&pool->vk.alloc, desc->push[i]); + desc->push[i] = NULL; + desc->sets[i] = NULL; /* We also need to set sets to NULL so state doesn't + propagate if we reset it */ + desc->sets_not_resident = 0u; + } +} + +void +kk_cmd_release_resources(struct kk_device *dev, struct kk_cmd_buffer *cmd) +{ + kk_cmd_release_dynamic_ds_state(cmd); + kk_descriptor_state_fini(cmd, &cmd->state.gfx.descriptors); + kk_descriptor_state_fini(cmd, &cmd->state.cs.descriptors); + + /* Release all BOs used as descriptor buffers for submissions */ + util_dynarray_foreach(&cmd->large_bos, struct kk_bo *, bo) { + kk_destroy_bo(dev, *bo); + } + util_dynarray_clear(&cmd->large_bos); +} + +static void +kk_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer) +{ + struct kk_cmd_buffer *cmd = + container_of(vk_cmd_buffer, struct kk_cmd_buffer, vk); + struct kk_cmd_pool *pool = kk_cmd_buffer_pool(cmd); + + vk_command_buffer_finish(&cmd->vk); + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + kk_cmd_release_resources(dev, cmd); + + vk_free(&pool->vk.alloc, cmd); +} + +static VkResult +kk_create_cmd_buffer(struct vk_command_pool *vk_pool, + VkCommandBufferLevel level, + struct vk_command_buffer **cmd_buffer_out) +{ + struct kk_cmd_pool *pool = container_of(vk_pool, struct kk_cmd_pool, vk); + struct kk_device *dev = kk_cmd_pool_device(pool); + struct kk_cmd_buffer *cmd; + VkResult result; + + cmd = vk_zalloc(&pool->vk.alloc, sizeof(*cmd), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (cmd == NULL) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + result = + vk_command_buffer_init(&pool->vk, &cmd->vk, &kk_cmd_buffer_ops, level); + if (result != VK_SUCCESS) { + vk_free(&pool->vk.alloc, cmd); + return result; + } + + util_dynarray_init(&cmd->large_bos, NULL); + + cmd->vk.dynamic_graphics_state.vi = &cmd->state.gfx._dynamic_vi; + cmd->vk.dynamic_graphics_state.ms.sample_locations = + &cmd->state.gfx._dynamic_sl; + + *cmd_buffer_out = &cmd->vk; + + return VK_SUCCESS; +} + +static void +kk_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, + UNUSED VkCommandBufferResetFlags flags) +{ + struct kk_cmd_buffer *cmd = + container_of(vk_cmd_buffer, struct kk_cmd_buffer, vk); + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + vk_command_buffer_reset(&cmd->vk); + kk_cmd_release_resources(dev, cmd); +} + +const struct vk_command_buffer_ops kk_cmd_buffer_ops = { + .create = kk_create_cmd_buffer, + .reset = kk_reset_cmd_buffer, + .destroy = kk_destroy_cmd_buffer, +}; + +VKAPI_ATTR VkResult VKAPI_CALL +kk_BeginCommandBuffer(VkCommandBuffer commandBuffer, + const VkCommandBufferBeginInfo *pBeginInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + + kk_reset_cmd_buffer(&cmd->vk, 0u); + vk_command_buffer_begin(&cmd->vk, pBeginInfo); + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_EndCommandBuffer(VkCommandBuffer commandBuffer) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + + return vk_command_buffer_end(&cmd->vk); +} + +static bool +kk_can_ignore_barrier(VkAccessFlags2 access, VkPipelineStageFlags2 stage) +{ + if (access == VK_ACCESS_2_NONE || stage == VK_PIPELINE_STAGE_2_NONE) + return true; + + const VkAccessFlags2 ignore_access = + VK_ACCESS_2_HOST_READ_BIT | VK_ACCESS_2_HOST_WRITE_BIT; + const VkPipelineStageFlags2 ignore_stage = VK_PIPELINE_STAGE_2_HOST_BIT; + return (!(access ^ ignore_access)) || (!(stage ^ ignore_stage)); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, + const VkDependencyInfo *pDependencyInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + enum kk_encoder_type last_used = cmd->encoder->main.last_used; + kk_encoder_signal_fence_and_end(cmd); + + /* If we were inside a render pass, restart it loading attachments */ + if (last_used == KK_ENC_RENDER) { + struct kk_graphics_state *state = &cmd->state.gfx; + assert(state->render_pass_descriptor); + kk_encoder_start_render(cmd, state->render_pass_descriptor, + state->render.view_mask); + kk_cmd_buffer_dirty_all_gfx(cmd); + } +} + +static void +kk_bind_descriptor_sets(struct kk_descriptor_state *desc, + const VkBindDescriptorSetsInfoKHR *info) +{ + VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout); + + /* From the Vulkan 1.3.275 spec: + * + * "When binding a descriptor set (see Descriptor Set Binding) to + * set number N... + * + * If, additionally, the previously bound descriptor set for set + * N was bound using a pipeline layout not compatible for set N, + * then all bindings in sets numbered greater than N are + * disturbed." + * + * This means that, if some earlier set gets bound in such a way that + * it changes set_dynamic_buffer_start[s], this binding is implicitly + * invalidated. Therefore, we can always look at the current value + * of set_dynamic_buffer_start[s] as the base of our dynamic buffer + * range and it's only our responsibility to adjust all + * set_dynamic_buffer_start[p] for p > s as needed. + */ + uint8_t dyn_buffer_start = + desc->root.set_dynamic_buffer_start[info->firstSet]; + + uint32_t next_dyn_offset = 0; + for (uint32_t i = 0; i < info->descriptorSetCount; ++i) { + unsigned s = i + info->firstSet; + VK_FROM_HANDLE(kk_descriptor_set, set, info->pDescriptorSets[i]); + + if (desc->sets[s] != set) { + if (set != NULL) { + desc->root.sets[s] = set->addr; + desc->set_sizes[s] = set->size; + } else { + desc->root.sets[s] = 0; + desc->set_sizes[s] = 0; + } + desc->sets[s] = set; + + desc->sets_not_resident |= BITFIELD_BIT(s); + + /* Binding descriptors invalidates push descriptors */ + desc->push_dirty &= ~BITFIELD_BIT(s); + } + + if (pipeline_layout->set_layouts[s] != NULL) { + const struct kk_descriptor_set_layout *set_layout = + vk_to_kk_descriptor_set_layout(pipeline_layout->set_layouts[s]); + + if (set != NULL && set_layout->dynamic_buffer_count > 0) { + for (uint32_t j = 0; j < set_layout->dynamic_buffer_count; j++) { + struct kk_buffer_address addr = set->dynamic_buffers[j]; + addr.base_addr += info->pDynamicOffsets[next_dyn_offset + j]; + desc->root.dynamic_buffers[dyn_buffer_start + j] = addr; + } + next_dyn_offset += set->layout->dynamic_buffer_count; + } + + dyn_buffer_start += set_layout->dynamic_buffer_count; + } else { + assert(set == NULL); + } + } + assert(dyn_buffer_start <= KK_MAX_DYNAMIC_BUFFERS); + assert(next_dyn_offset <= info->dynamicOffsetCount); + + for (uint32_t s = info->firstSet + info->descriptorSetCount; s < KK_MAX_SETS; + s++) + desc->root.set_dynamic_buffer_start[s] = dyn_buffer_start; + + desc->root_dirty = true; +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdBindDescriptorSets2KHR( + VkCommandBuffer commandBuffer, + const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + + if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) { + kk_bind_descriptor_sets(&cmd->state.gfx.descriptors, + pBindDescriptorSetsInfo); + } + + if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) { + kk_bind_descriptor_sets(&cmd->state.cs.descriptors, + pBindDescriptorSetsInfo); + } +} + +static struct kk_push_descriptor_set * +kk_cmd_push_descriptors(struct kk_cmd_buffer *cmd, + struct kk_descriptor_state *desc, + struct kk_descriptor_set_layout *set_layout, + uint32_t set) +{ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + assert(set < KK_MAX_SETS); + if (unlikely(desc->push[set] == NULL)) { + size_t size = sizeof(*desc->push[set]) + + (sizeof(mtl_resource *) * set_layout->descriptor_count); + desc->push[set] = vk_zalloc(&cmd->vk.pool->alloc, size, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (unlikely(desc->push[set] == NULL)) { + vk_command_buffer_set_error(&cmd->vk, VK_ERROR_OUT_OF_HOST_MEMORY); + return NULL; + } + desc->push[set]->layout = set_layout; + for (uint32_t i = 0u; i < set_layout->descriptor_count; ++i) + desc->push[set]->mtl_resources[i] = dev->null_descriptor->map; + } + + /* Pushing descriptors replaces whatever sets are bound */ + desc->sets[set] = NULL; + desc->push_dirty |= BITFIELD_BIT(set); + desc->sets_not_resident |= BITFIELD_BIT(set); + + return desc->push[set]; +} + +static void +kk_push_descriptor_set(struct kk_cmd_buffer *cmd, + struct kk_descriptor_state *desc, + const VkPushDescriptorSetInfoKHR *info) +{ + VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, info->layout); + + struct kk_descriptor_set_layout *set_layout = + vk_to_kk_descriptor_set_layout(pipeline_layout->set_layouts[info->set]); + + struct kk_push_descriptor_set *push_set = + kk_cmd_push_descriptors(cmd, desc, set_layout, info->set); + if (unlikely(push_set == NULL)) + return; + + kk_push_descriptor_set_update(push_set, info->descriptorWriteCount, + info->pDescriptorWrites); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdPushDescriptorSet2KHR( + VkCommandBuffer commandBuffer, + const VkPushDescriptorSetInfoKHR *pPushDescriptorSetInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + + if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) { + kk_push_descriptor_set(cmd, &cmd->state.gfx.descriptors, + pPushDescriptorSetInfo); + } + + if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) { + kk_push_descriptor_set(cmd, &cmd->state.cs.descriptors, + pPushDescriptorSetInfo); + } +} + +static void +kk_push_constants(UNUSED struct kk_cmd_buffer *cmd, + struct kk_descriptor_state *desc, + const VkPushConstantsInfoKHR *info) +{ + memcpy(desc->root.push + info->offset, info->pValues, info->size); + desc->root_dirty = true; +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdPushConstants2KHR(VkCommandBuffer commandBuffer, + const VkPushConstantsInfoKHR *pPushConstantsInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + + if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) + kk_push_constants(cmd, &cmd->state.gfx.descriptors, pPushConstantsInfo); + + if (pPushConstantsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) + kk_push_constants(cmd, &cmd->state.cs.descriptors, pPushConstantsInfo); +} + +void +kk_cmd_buffer_write_descriptor_buffer(struct kk_cmd_buffer *cmd, + struct kk_descriptor_state *desc, + size_t size, size_t offset) +{ + assert(size + offset <= sizeof(desc->root.sets)); + + struct kk_bo *root_buffer = desc->root.root_buffer; + + memcpy(root_buffer->cpu, (uint8_t *)desc->root.sets + offset, size); +} + +void +kk_cmd_release_dynamic_ds_state(struct kk_cmd_buffer *cmd) +{ + if (cmd->state.gfx.is_depth_stencil_dynamic && + cmd->state.gfx.depth_stencil_state) + mtl_release(cmd->state.gfx.depth_stencil_state); + cmd->state.gfx.depth_stencil_state = NULL; +} + +struct kk_bo * +kk_cmd_allocate_buffer(struct kk_cmd_buffer *cmd, size_t size_B, + size_t alignment_B) +{ + struct kk_bo *buffer = NULL; + + VkResult result = kk_alloc_bo(kk_cmd_buffer_device(cmd), &cmd->vk.base, + size_B, alignment_B, &buffer); + if (result != VK_SUCCESS) { + vk_command_buffer_set_error(&cmd->vk, result); + return NULL; + } + util_dynarray_append(&cmd->large_bos, struct kk_bo *, buffer); + + return buffer; +} + +struct kk_pool +kk_pool_upload(struct kk_cmd_buffer *cmd, void *data, size_t size_B, + size_t alignment_B) +{ + struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, size_B, alignment_B); + if (!bo) + return (struct kk_pool){}; + + memcpy(bo->cpu, data, size_B); + struct kk_pool pool = {.handle = bo->map, .gpu = bo->gpu, .cpu = bo->cpu}; + + return pool; +} + +uint64_t +kk_upload_descriptor_root(struct kk_cmd_buffer *cmd, + VkPipelineBindPoint bind_point) +{ + struct kk_descriptor_state *desc = kk_get_descriptors_state(cmd, bind_point); + struct kk_root_descriptor_table *root = &desc->root; + struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, sizeof(*root), 8u); + if (bo == NULL) + return 0u; + + memcpy(bo->cpu, root, sizeof(*root)); + root->root_buffer = bo; + + return bo->gpu; +} + +void +kk_cmd_buffer_flush_push_descriptors(struct kk_cmd_buffer *cmd, + struct kk_descriptor_state *desc) +{ + u_foreach_bit(set_idx, desc->push_dirty) { + struct kk_push_descriptor_set *push_set = desc->push[set_idx]; + struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, sizeof(push_set->data), + KK_MIN_UBO_ALIGNMENT); + if (bo == NULL) + return; + + memcpy(bo->cpu, push_set->data, sizeof(push_set->data)); + push_set->mtl_descriptor_buffer = bo->map; + desc->root.sets[set_idx] = bo->gpu; + desc->set_sizes[set_idx] = sizeof(push_set->data); + } + + desc->root_dirty = true; + desc->push_dirty = 0; +} + +static void +kk_make_graphics_descriptor_resources_resident(struct kk_cmd_buffer *cmd) +{ + struct kk_descriptor_state *desc = &cmd->state.gfx.descriptors; + mtl_render_encoder *encoder = kk_render_encoder(cmd); + /* Make resources resident as required by Metal */ + u_foreach_bit(set_index, desc->sets_not_resident) { + mtl_resource *descriptor_buffer = NULL; + + /* If we have no set, it means it was a push set */ + if (desc->sets[set_index]) { + struct kk_descriptor_set *set = desc->sets[set_index]; + descriptor_buffer = set->mtl_descriptor_buffer; + } else { + struct kk_push_descriptor_set *push_set = desc->push[set_index]; + descriptor_buffer = push_set->mtl_descriptor_buffer; + } + + /* We could have empty descriptor sets for some reason... */ + if (descriptor_buffer) { + mtl_render_use_resource(encoder, descriptor_buffer, + MTL_RESOURCE_USAGE_READ); + } + } + + desc->sets_not_resident = 0u; +} + +static void +kk_make_compute_descriptor_resources_resident(struct kk_cmd_buffer *cmd) +{ + struct kk_descriptor_state *desc = &cmd->state.cs.descriptors; + mtl_compute_encoder *encoder = kk_compute_encoder(cmd); + u_foreach_bit(set_index, desc->sets_not_resident) { + /* Make resources resident as required by Metal */ + mtl_resource *descriptor_buffer = NULL; + if (desc->sets[set_index]) { + struct kk_descriptor_set *set = desc->sets[set_index]; + descriptor_buffer = set->mtl_descriptor_buffer; + } else { + struct kk_push_descriptor_set *push_set = desc->push[set_index]; + descriptor_buffer = push_set->mtl_descriptor_buffer; + } + + /* We could have empty descriptor sets for some reason... */ + if (descriptor_buffer) { + mtl_compute_use_resource(encoder, descriptor_buffer, + MTL_RESOURCE_USAGE_READ); + } + } + + desc->sets_not_resident = 0u; +} + +void +kk_make_descriptor_resources_resident(struct kk_cmd_buffer *cmd, + VkPipelineBindPoint bind_point) +{ + if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) + kk_make_graphics_descriptor_resources_resident(cmd); + else if (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) + kk_make_compute_descriptor_resources_resident(cmd); +} + +void +kk_cmd_write(struct kk_cmd_buffer *cmd, mtl_buffer *buffer, uint64_t addr, + uint64_t value) +{ + util_dynarray_append(&cmd->encoder->imm_writes, uint64_t, addr); + util_dynarray_append(&cmd->encoder->imm_writes, uint64_t, value); + util_dynarray_append(&cmd->encoder->resident_buffers, mtl_buffer *, buffer); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdPushDescriptorSetWithTemplate2KHR( + VkCommandBuffer commandBuffer, const VkPushDescriptorSetWithTemplateInfoKHR + *pPushDescriptorSetWithTemplateInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(vk_descriptor_update_template, template, + pPushDescriptorSetWithTemplateInfo->descriptorUpdateTemplate); + VK_FROM_HANDLE(vk_pipeline_layout, pipeline_layout, + pPushDescriptorSetWithTemplateInfo->layout); + + struct kk_descriptor_state *desc = + kk_get_descriptors_state(cmd, template->bind_point); + struct kk_descriptor_set_layout *set_layout = vk_to_kk_descriptor_set_layout( + pipeline_layout->set_layouts[pPushDescriptorSetWithTemplateInfo->set]); + struct kk_push_descriptor_set *push_set = kk_cmd_push_descriptors( + cmd, desc, set_layout, pPushDescriptorSetWithTemplateInfo->set); + if (unlikely(push_set == NULL)) + return; + + kk_push_descriptor_set_update_template( + push_set, set_layout, template, + pPushDescriptorSetWithTemplateInfo->pData); +} diff --git a/src/kosmickrisp/vulkan/kk_cmd_buffer.h b/src/kosmickrisp/vulkan/kk_cmd_buffer.h new file mode 100644 index 00000000000..83d91f4e0b2 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_cmd_buffer.h @@ -0,0 +1,270 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_CMD_BUFFER_H +#define KK_CMD_BUFFER_H 1 + +#include "kk_private.h" + +#include "kk_descriptor_set.h" +#include "kk_image.h" +#include "kk_nir_lower_vbo.h" +#include "kk_shader.h" + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "util/u_dynarray.h" + +#include "vk_command_buffer.h" + +#include <stdio.h> + +struct kk_query_pool; + +struct kk_root_descriptor_table { + struct kk_bo *root_buffer; + + union { + struct { + /* Vertex input state */ + uint32_t buffer_strides[KK_MAX_VBUFS]; + uint64_t attrib_base[KK_MAX_ATTRIBS]; + uint32_t attrib_clamps[KK_MAX_ATTRIBS]; + float blend_constant[4]; + } draw; + struct { + uint32_t base_group[3]; + } cs; + }; + + /* Client push constants */ + uint8_t push[KK_MAX_PUSH_SIZE]; + + /* Descriptor set base addresses */ + uint64_t sets[KK_MAX_SETS]; + + /* Dynamic buffer bindings */ + struct kk_buffer_address dynamic_buffers[KK_MAX_DYNAMIC_BUFFERS]; + + /* Start index in dynamic_buffers where each set starts */ + uint8_t set_dynamic_buffer_start[KK_MAX_SETS]; +}; + +struct kk_descriptor_state { + bool root_dirty; + struct kk_root_descriptor_table root; + + uint32_t set_sizes[KK_MAX_SETS]; + struct kk_descriptor_set *sets[KK_MAX_SETS]; + mtl_resource **resources[KK_MAX_SETS]; + /* Non resident sets can either be sets or push. If sets[index] == NULL, then + * push[index] != NULL */ + uint32_t sets_not_resident; + + uint32_t push_dirty; + struct kk_push_descriptor_set *push[KK_MAX_SETS]; +}; + +struct kk_attachment { + VkFormat vk_format; + struct kk_image_view *iview; + + VkResolveModeFlagBits resolve_mode; + struct kk_image_view *resolve_iview; + + /* Needed to track the value of storeOp in case we need to copy images for + * the DRM_FORMAT_MOD_LINEAR case */ + VkAttachmentStoreOp store_op; +}; + +struct kk_rendering_state { + VkRenderingFlagBits flags; + + VkRect2D area; + uint32_t layer_count; + uint32_t view_mask; + uint32_t samples; + + uint32_t color_att_count; + struct kk_attachment color_att[KK_MAX_RTS]; + struct kk_attachment depth_att; + struct kk_attachment stencil_att; + struct kk_attachment fsr_att; +}; + +/* Dirty tracking bits for state not tracked by vk_dynamic_graphics_state or + * shaders_dirty. + */ +enum kk_dirty { + KK_DIRTY_INDEX = BITFIELD_BIT(0), + KK_DIRTY_VB = BITFIELD_BIT(1), + KK_DIRTY_OCCLUSION = BITFIELD_BIT(2), + KK_DIRTY_PROVOKING = BITFIELD_BIT(3), + KK_DIRTY_VARYINGS = BITFIELD_BIT(4), + KK_DIRTY_PIPELINE = BITFIELD_BIT(5), +}; + +struct kk_graphics_state { + struct kk_rendering_state render; + struct kk_descriptor_state descriptors; + + mtl_render_pipeline_state *pipeline_state; + mtl_depth_stencil_state *depth_stencil_state; + mtl_render_pass_descriptor *render_pass_descriptor; + bool is_depth_stencil_dynamic; + bool is_cull_front_and_back; + bool restart_disabled; + + enum mtl_primitive_type primitive_type; + enum mesa_prim prim; + enum kk_dirty dirty; + + struct { + enum mtl_visibility_result_mode mode; + + /* If enabled, index of the current occlusion query in the occlusion heap. + * There can only be one active at a time (hardware constraint). + */ + uint16_t index; + } occlusion; + + /* Index buffer */ + struct { + mtl_buffer *handle; + uint32_t size; + uint32_t offset; + uint32_t restart; + uint8_t bytes_per_index; + } index; + + /* Vertex buffers */ + struct { + struct kk_addr_range addr_range[KK_MAX_VBUFS]; + mtl_buffer *handles[KK_MAX_VBUFS]; + uint32_t attribs_read; + /* Required to understand maximum size of index buffer if primitive is + * triangle fans */ + uint32_t max_vertices; + } vb; + + /* Needed by vk_command_buffer::dynamic_graphics_state */ + struct vk_vertex_input_state _dynamic_vi; + struct vk_sample_locations_state _dynamic_sl; +}; + +struct kk_compute_state { + struct kk_descriptor_state descriptors; + mtl_compute_pipeline_state *pipeline_state; + struct mtl_size local_size; + enum kk_dirty dirty; +}; + +struct kk_encoder; + +struct kk_cmd_buffer { + struct vk_command_buffer vk; + + struct kk_encoder *encoder; + void *drawable; + + struct { + struct kk_graphics_state gfx; + struct kk_compute_state cs; + } state; + + /* Owned large BOs */ + struct util_dynarray large_bos; +}; + +VK_DEFINE_HANDLE_CASTS(kk_cmd_buffer, vk.base, VkCommandBuffer, + VK_OBJECT_TYPE_COMMAND_BUFFER) + +extern const struct vk_command_buffer_ops kk_cmd_buffer_ops; + +static inline struct kk_device * +kk_cmd_buffer_device(struct kk_cmd_buffer *cmd) +{ + return (struct kk_device *)cmd->vk.base.device; +} + +static inline struct kk_cmd_pool * +kk_cmd_buffer_pool(struct kk_cmd_buffer *cmd) +{ + return (struct kk_cmd_pool *)cmd->vk.pool; +} + +static inline struct kk_descriptor_state * +kk_get_descriptors_state(struct kk_cmd_buffer *cmd, + VkPipelineBindPoint bind_point) +{ + switch (bind_point) { + case VK_PIPELINE_BIND_POINT_GRAPHICS: + return &cmd->state.gfx.descriptors; + case VK_PIPELINE_BIND_POINT_COMPUTE: + return &cmd->state.cs.descriptors; + default: + UNREACHABLE("Unhandled bind point"); + } +}; + +void kk_cmd_release_resources(struct kk_device *dev, struct kk_cmd_buffer *cmd); + +static void +kk_cmd_buffer_dirty_all_gfx(struct kk_cmd_buffer *cmd) +{ + /* Ensure we flush all graphics state */ + vk_dynamic_graphics_state_dirty_all(&cmd->vk.dynamic_graphics_state); + cmd->state.gfx.dirty = ~0u; +} + +void kk_cmd_release_dynamic_ds_state(struct kk_cmd_buffer *cmd); + +mtl_depth_stencil_state * +kk_compile_depth_stencil_state(struct kk_device *device, + const struct vk_depth_stencil_state *ds, + bool has_depth, bool has_stencil); + +void kk_meta_resolve_rendering(struct kk_cmd_buffer *cmd, + const VkRenderingInfo *pRenderingInfo); + +void kk_cmd_buffer_write_descriptor_buffer(struct kk_cmd_buffer *cmd, + struct kk_descriptor_state *desc, + size_t size, size_t offset); + +/* Allocates temporary buffer that will be released once the command buffer has + * completed */ +struct kk_bo *kk_cmd_allocate_buffer(struct kk_cmd_buffer *cmd, size_t size_B, + size_t alignment_B); + +struct kk_pool { + mtl_buffer *handle; + uint64_t gpu; + void *cpu; +}; +struct kk_pool kk_pool_upload(struct kk_cmd_buffer *cmd, void *data, + size_t size_B, size_t alignment_B); + +uint64_t kk_upload_descriptor_root(struct kk_cmd_buffer *cmd, + VkPipelineBindPoint bind_point); + +void kk_cmd_buffer_flush_push_descriptors(struct kk_cmd_buffer *cmd, + struct kk_descriptor_state *desc); + +void kk_make_descriptor_resources_resident(struct kk_cmd_buffer *cmd, + VkPipelineBindPoint bind_point); + +void kk_cmd_write(struct kk_cmd_buffer *cmd, mtl_buffer *buffer, uint64_t addr, + uint64_t value); + +void kk_cmd_dispatch_pipeline(struct kk_cmd_buffer *cmd, + mtl_compute_encoder *encoder, + mtl_compute_pipeline_state *pipeline, + const void *push_data, size_t push_size, + uint32_t groupCountX, uint32_t groupCountY, + uint32_t groupCountZ); + +#endif diff --git a/src/kosmickrisp/vulkan/kk_cmd_clear.c b/src/kosmickrisp/vulkan/kk_cmd_clear.c new file mode 100644 index 00000000000..2f5e418d1ae --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_cmd_clear.c @@ -0,0 +1,169 @@ +/* + * Copyright 2024 Valve Corporation + * Copyright 2024 Alyssa Rosenzweig + * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ +#include "kk_cmd_buffer.h" + +#include "kk_device.h" +#include "kk_entrypoints.h" +#include "kk_image.h" +#include "kk_image_view.h" +#include "kk_physical_device.h" + +#include "vk_format.h" +#include "vk_meta.h" + +static VkImageViewType +render_view_type(VkImageType image_type, unsigned layer_count) +{ + switch (image_type) { + case VK_IMAGE_TYPE_1D: + return layer_count == 1 ? VK_IMAGE_VIEW_TYPE_1D + : VK_IMAGE_VIEW_TYPE_1D_ARRAY; + case VK_IMAGE_TYPE_2D: + return layer_count == 1 ? VK_IMAGE_VIEW_TYPE_2D + : VK_IMAGE_VIEW_TYPE_2D_ARRAY; + case VK_IMAGE_TYPE_3D: + return VK_IMAGE_VIEW_TYPE_3D; + default: + UNREACHABLE("Invalid image type"); + } +} + +static void +clear_image(struct kk_cmd_buffer *cmd, struct kk_image *image, + VkImageLayout image_layout, VkFormat format, + const VkClearValue *clear_value, uint32_t range_count, + const VkImageSubresourceRange *ranges) +{ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + ASSERTED VkResult result; + + for (uint32_t r = 0; r < range_count; r++) { + const uint32_t level_count = + vk_image_subresource_level_count(&image->vk, &ranges[r]); + + for (uint32_t l = 0; l < level_count; l++) { + const uint32_t level = ranges[r].baseMipLevel + l; + + const VkExtent3D level_extent = + vk_image_mip_level_extent(&image->vk, level); + + uint32_t base_array_layer, layer_count; + if (image->vk.image_type == VK_IMAGE_TYPE_3D) { + base_array_layer = 0; + layer_count = level_extent.depth; + } else { + base_array_layer = ranges[r].baseArrayLayer; + layer_count = + vk_image_subresource_layer_count(&image->vk, &ranges[r]); + } + + const VkImageViewUsageCreateInfo view_usage_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, + .usage = (ranges[r].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) + ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT + : VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + }; + const VkImageViewCreateInfo view_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .flags = VK_IMAGE_VIEW_CREATE_DRIVER_INTERNAL_BIT_MESA, + .pNext = &view_usage_info, + .image = kk_image_to_handle(image), + .viewType = render_view_type(image->vk.image_type, layer_count), + .format = format, + .subresourceRange = + { + .aspectMask = image->vk.aspects, + .baseMipLevel = level, + .levelCount = 1, + .baseArrayLayer = base_array_layer, + .layerCount = layer_count, + }, + }; + + /* We use vk_meta_create_image_view here for lifetime managemnt */ + VkImageView view; + result = + vk_meta_create_image_view(&cmd->vk, &dev->meta, &view_info, &view); + assert(result == VK_SUCCESS); + + VkRenderingInfo render = { + .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, + .renderArea = + { + .offset = {0, 0}, + .extent = {level_extent.width, level_extent.height}, + }, + .layerCount = layer_count, + }; + + VkRenderingAttachmentInfo vk_att = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = view, + .imageLayout = image_layout, + .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .clearValue = *clear_value, + }; + + if (ranges[r].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { + render.colorAttachmentCount = 1; + render.pColorAttachments = &vk_att; + } + if (ranges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) + render.pDepthAttachment = &vk_att; + if (ranges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) + render.pStencilAttachment = &vk_att; + + kk_CmdBeginRendering(kk_cmd_buffer_to_handle(cmd), &render); + kk_CmdEndRendering(kk_cmd_buffer_to_handle(cmd)); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage _image, + VkImageLayout imageLayout, + const VkClearColorValue *pColor, uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_image, image, _image); + + VkClearValue clear_value = { + .color = *pColor, + }; + + VkFormat vk_format = image->vk.format; + if (vk_format == VK_FORMAT_R64_UINT || vk_format == VK_FORMAT_R64_SINT) + vk_format = VK_FORMAT_R32G32_UINT; + + enum pipe_format p_format = vk_format_to_pipe_format(vk_format); + assert(p_format != PIPE_FORMAT_NONE); + + clear_image(cmd, image, imageLayout, vk_format, &clear_value, rangeCount, + pRanges); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage _image, + VkImageLayout imageLayout, + const VkClearDepthStencilValue *pDepthStencil, + uint32_t rangeCount, + const VkImageSubresourceRange *pRanges) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_image, image, _image); + + const VkClearValue clear_value = { + .depthStencil = *pDepthStencil, + }; + + clear_image(cmd, image, imageLayout, image->vk.format, &clear_value, + rangeCount, pRanges); +} diff --git a/src/kosmickrisp/vulkan/kk_cmd_copy.c b/src/kosmickrisp/vulkan/kk_cmd_copy.c new file mode 100644 index 00000000000..32b1b5af359 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_cmd_copy.c @@ -0,0 +1,355 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_cmd_buffer.h" + +#include "kk_bo.h" +#include "kk_buffer.h" +#include "kk_device.h" +#include "kk_encoder.h" +#include "kk_entrypoints.h" +#include "kk_physical_device.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" +#include "kosmickrisp/bridge/vk_to_mtl_map.h" + +#include "util/format/u_format.h" + +VKAPI_ATTR void VKAPI_CALL +kk_CmdCopyBuffer2(VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2 *pCopyBufferInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_buffer, src, pCopyBufferInfo->srcBuffer); + VK_FROM_HANDLE(kk_buffer, dst, pCopyBufferInfo->dstBuffer); + + mtl_blit_encoder *blit = kk_blit_encoder(cmd); + for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) { + const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[i]; + mtl_copy_from_buffer_to_buffer(blit, src->mtl_handle, region->srcOffset, + dst->mtl_handle, region->dstOffset, + region->size); + } +} + +struct kk_buffer_image_copy_info { + struct mtl_buffer_image_copy mtl_data; + size_t buffer_slice_size_B; +}; + +static struct kk_buffer_image_copy_info +vk_buffer_image_copy_to_mtl_buffer_image_copy( + const VkBufferImageCopy2 *region, const struct kk_image_plane *plane) +{ + struct kk_buffer_image_copy_info copy; + enum pipe_format p_format = plane->layout.format.pipe; + if (region->imageSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) { + copy.mtl_data.options = MTL_BLIT_OPTION_DEPTH_FROM_DEPTH_STENCIL; + p_format = util_format_get_depth_only(p_format); + } else if (region->imageSubresource.aspectMask == + VK_IMAGE_ASPECT_STENCIL_BIT) { + copy.mtl_data.options = MTL_BLIT_OPTION_STENCIL_FROM_DEPTH_STENCIL; + p_format = PIPE_FORMAT_S8_UINT; + } else + copy.mtl_data.options = MTL_BLIT_OPTION_NONE; + + const uint32_t buffer_width = region->bufferRowLength + ? region->bufferRowLength + : region->imageExtent.width; + const uint32_t buffer_height = region->bufferImageHeight + ? region->bufferImageHeight + : region->imageExtent.height; + + const uint32_t buffer_stride_B = + util_format_get_stride(p_format, buffer_width); + const uint32_t buffer_size_2d_B = + util_format_get_2d_size(p_format, buffer_stride_B, buffer_height); + + /* Metal requires this value to be 0 for 2D images, otherwise the number of + * bytes between each 2D image of a 3D texture */ + copy.mtl_data.buffer_2d_image_size_B = + plane->layout.depth_px == 1u ? 0u : buffer_size_2d_B; + copy.mtl_data.buffer_stride_B = buffer_stride_B; + copy.mtl_data.image_size = vk_extent_3d_to_mtl_size(®ion->imageExtent); + copy.mtl_data.image_origin = + vk_offset_3d_to_mtl_origin(®ion->imageOffset); + copy.mtl_data.image_level = region->imageSubresource.mipLevel; + copy.buffer_slice_size_B = buffer_size_2d_B; + + return copy; +} + +#define kk_foreach_slice(ndx, image, subresource_member) \ + for (uint32_t ndx = region->subresource_member.baseArrayLayer; \ + ndx < (region->subresource_member.baseArrayLayer + \ + vk_image_subresource_layer_count(&image->vk, \ + ®ion->subresource_member)); \ + ++ndx) + +VKAPI_ATTR void VKAPI_CALL +kk_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_buffer, buffer, pCopyBufferToImageInfo->srcBuffer); + VK_FROM_HANDLE(kk_image, image, pCopyBufferToImageInfo->dstImage); + + mtl_blit_encoder *blit = kk_blit_encoder(cmd); + for (int r = 0; r < pCopyBufferToImageInfo->regionCount; r++) { + const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r]; + const uint8_t plane_index = kk_image_memory_aspects_to_plane( + image, region->imageSubresource.aspectMask); + struct kk_image_plane *plane = &image->planes[plane_index]; + struct kk_buffer_image_copy_info info = + vk_buffer_image_copy_to_mtl_buffer_image_copy(region, plane); + info.mtl_data.buffer = buffer->mtl_handle; + info.mtl_data.image = plane->mtl_handle; + size_t buffer_offset = region->bufferOffset; + + kk_foreach_slice(slice, image, imageSubresource) + { + info.mtl_data.image_slice = slice; + info.mtl_data.buffer_offset_B = buffer_offset; + mtl_copy_from_buffer_to_texture(blit, &info.mtl_data); + buffer_offset += info.buffer_slice_size_B; + } + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_image, image, pCopyImageToBufferInfo->srcImage); + VK_FROM_HANDLE(kk_buffer, buffer, pCopyImageToBufferInfo->dstBuffer); + + mtl_blit_encoder *blit = kk_blit_encoder(cmd); + for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) { + const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[r]; + const uint8_t plane_index = kk_image_memory_aspects_to_plane( + image, region->imageSubresource.aspectMask); + struct kk_image_plane *plane = &image->planes[plane_index]; + struct kk_buffer_image_copy_info info = + vk_buffer_image_copy_to_mtl_buffer_image_copy(region, plane); + info.mtl_data.buffer = buffer->mtl_handle; + info.mtl_data.image = plane->mtl_handle; + size_t buffer_offset = region->bufferOffset; + + kk_foreach_slice(slice, image, imageSubresource) + { + info.mtl_data.image_slice = slice; + info.mtl_data.buffer_offset_B = buffer_offset; + mtl_copy_from_texture_to_buffer(blit, &info.mtl_data); + buffer_offset += info.buffer_slice_size_B; + } + } +} + +struct copy_image_data { + struct kk_cmd_buffer *cmd; + struct kk_image *src; + struct kk_image *dst; + const VkImageCopy2 *regions; + uint32_t plane_index; + uint32_t region_count; +}; + +/* Copies images by doing a texture->buffer->texture transfer. This is required + * for compressed formats */ +static void +copy_through_buffer(struct copy_image_data *data) +{ + struct kk_image *src = data->src; + struct kk_image *dst = data->dst; + struct kk_image_plane *src_plane = &src->planes[data->plane_index]; + struct kk_image_plane *dst_plane = &dst->planes[data->plane_index]; + enum pipe_format src_format = src_plane->layout.format.pipe; + enum pipe_format dst_format = dst_plane->layout.format.pipe; + bool is_src_compressed = util_format_is_compressed(src_format); + bool is_dst_compressed = util_format_is_compressed(dst_format); + /* We shouldn't do any depth/stencil through this path */ + assert(!util_format_is_depth_or_stencil(src_format) || + !util_format_is_depth_or_stencil(dst_format)); + mtl_blit_encoder *blit = kk_blit_encoder(data->cmd); + + size_t buffer_size = 0u; + for (unsigned r = 0; r < data->region_count; r++) { + const VkImageCopy2 *region = &data->regions[r]; + const uint32_t buffer_stride_B = + util_format_get_stride(src_format, region->extent.width); + const uint32_t buffer_size_2d_B = util_format_get_2d_size( + src_format, buffer_stride_B, region->extent.height); + const uint32_t layer_count = + vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource); + buffer_size += buffer_size_2d_B * layer_count; + } + struct kk_bo *bo = kk_cmd_allocate_buffer(data->cmd, buffer_size, 8); + + size_t buffer_offset = 0u; + for (unsigned r = 0; r < data->region_count; r++) { + const VkImageCopy2 *region = &data->regions[r]; + uint32_t mip_level = region->srcSubresource.mipLevel; + const uint32_t mip_width = + u_minify(src_plane->layout.width_px, mip_level); + const uint32_t mip_height = + u_minify(src_plane->layout.height_px, mip_level); + const uint32_t stride_B = util_format_get_stride(src_format, mip_width); + const uint32_t size_2d_B = + util_format_get_2d_size(src_format, stride_B, mip_height); + const uint32_t buffer_stride_B = + util_format_get_stride(src_format, region->extent.width); + const uint32_t buffer_size_2d_B = util_format_get_2d_size( + src_format, buffer_stride_B, region->extent.height); + + struct kk_buffer_image_copy_info info; + + /* Metal requires this value to be 0 for 2D images, otherwise the number + * of bytes between each 2D image of a 3D texture */ + info.mtl_data.buffer_2d_image_size_B = + src_plane->layout.depth_px == 1u ? 0u : size_2d_B; + info.mtl_data.buffer_stride_B = buffer_stride_B; + info.mtl_data.image_level = mip_level; + info.mtl_data.buffer = bo->map; + info.mtl_data.options = MTL_BLIT_OPTION_NONE; + info.buffer_slice_size_B = buffer_size_2d_B; + struct mtl_size src_size = vk_extent_3d_to_mtl_size(®ion->extent); + struct mtl_size dst_size = vk_extent_3d_to_mtl_size(®ion->extent); + /* Need to adjust size to block dimensions */ + if (is_src_compressed) { + dst_size.x /= util_format_get_blockwidth(src_format); + dst_size.y /= util_format_get_blockheight(src_format); + dst_size.z /= util_format_get_blockdepth(src_format); + } + if (is_dst_compressed) { + dst_size.x *= util_format_get_blockwidth(dst_format); + dst_size.y *= util_format_get_blockheight(dst_format); + dst_size.z *= util_format_get_blockdepth(dst_format); + } + struct mtl_origin src_origin = + vk_offset_3d_to_mtl_origin(®ion->srcOffset); + struct mtl_origin dst_origin = + vk_offset_3d_to_mtl_origin(®ion->dstOffset); + + /* Texture->Buffer->Texture */ + // TODO_KOSMICKRISP We don't handle 3D to 2D array nor vice-versa in this + // path. Unsure if it's even needed, can compressed textures be 3D? + kk_foreach_slice(slice, src, srcSubresource) + { + info.mtl_data.image = src_plane->mtl_handle; + info.mtl_data.image_size = src_size; + info.mtl_data.image_origin = src_origin; + info.mtl_data.image_slice = slice; + info.mtl_data.buffer_offset_B = buffer_offset; + mtl_copy_from_texture_to_buffer(blit, &info.mtl_data); + + info.mtl_data.image = dst_plane->mtl_handle; + info.mtl_data.image_size = dst_size; + info.mtl_data.image_origin = dst_origin; + mtl_copy_from_buffer_to_texture(blit, &info.mtl_data); + + buffer_offset += info.buffer_slice_size_B; + } + } +} + +/* Copies images through Metal's texture->texture copy mechanism */ +static void +copy_image(struct copy_image_data *data) +{ + mtl_blit_encoder *blit = kk_blit_encoder(data->cmd); + for (unsigned r = 0; r < data->region_count; r++) { + const VkImageCopy2 *region = &data->regions[r]; + uint8_t src_plane_index = kk_image_aspects_to_plane( + data->src, region->srcSubresource.aspectMask); + if (data->plane_index != src_plane_index) + continue; + + uint8_t dst_plane_index = kk_image_aspects_to_plane( + data->dst, region->dstSubresource.aspectMask); + struct kk_image *src = data->src; + struct kk_image *dst = data->dst; + struct kk_image_plane *src_plane = &src->planes[src_plane_index]; + struct kk_image_plane *dst_plane = &dst->planes[dst_plane_index]; + + /* From the Vulkan 1.3.217 spec: + * + * "When copying between compressed and uncompressed formats the + * extent members represent the texel dimensions of the source image + * and not the destination." + */ + const VkExtent3D extent_px = + vk_image_sanitize_extent(&src->vk, region->extent); + + size_t src_slice = region->srcSubresource.baseArrayLayer; + size_t src_level = region->srcSubresource.mipLevel; + struct mtl_origin src_origin = + vk_offset_3d_to_mtl_origin(®ion->srcOffset); + struct mtl_size size = {.x = extent_px.width, + .y = extent_px.height, + .z = extent_px.depth}; + size_t dst_slice = region->dstSubresource.baseArrayLayer; + size_t dst_level = region->dstSubresource.mipLevel; + struct mtl_origin dst_origin = + vk_offset_3d_to_mtl_origin(®ion->dstOffset); + + /* When copying 3D to 2D layered or vice-versa, we need to change the 3D + * size to 2D and iterate on the layer count of the 2D image (which is the + * same as the depth of the 3D) and adjust origin and slice accordingly */ + uint32_t layer_count = + vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource); + const uint32_t dst_layer_count = + vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource); + size_t *src_increase = &src_slice; + size_t *dst_increase = &dst_slice; + + if (layer_count < dst_layer_count) { /* 3D to 2D layered */ + layer_count = dst_layer_count; + src_increase = &src_origin.z; + size.z = 1u; + } else if (dst_layer_count < layer_count) { /* 2D layered to 3D */ + dst_increase = &dst_origin.z; + size.z = 1u; + } + for (uint32_t l = 0; l < layer_count; + ++l, ++(*src_increase), ++(*dst_increase)) { + mtl_copy_from_texture_to_texture( + blit, src_plane->mtl_handle, src_slice, src_level, src_origin, size, + dst_plane->mtl_handle, dst_slice, dst_level, dst_origin); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdCopyImage2(VkCommandBuffer commandBuffer, + const VkCopyImageInfo2 *pCopyImageInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_image, src, pCopyImageInfo->srcImage); + VK_FROM_HANDLE(kk_image, dst, pCopyImageInfo->dstImage); + + for (uint32_t i = 0u; i < src->plane_count; ++i) { + struct kk_image_plane *src_plane = &src->planes[i]; + struct kk_image_plane *dst_plane = &dst->planes[i]; + enum pipe_format src_format = src_plane->layout.format.pipe; + enum pipe_format dst_format = dst_plane->layout.format.pipe; + struct copy_image_data data = { + .cmd = cmd, + .src = src, + .dst = dst, + .regions = pCopyImageInfo->pRegions, + .plane_index = i, + .region_count = pCopyImageInfo->regionCount, + }; + bool is_src_compressed = util_format_is_compressed(src_format); + bool is_dst_compressed = util_format_is_compressed(dst_format); + if (src_format != dst_format && (is_src_compressed || is_dst_compressed)) + copy_through_buffer(&data); + else + copy_image(&data); + } +} diff --git a/src/kosmickrisp/vulkan/kk_cmd_dispatch.c b/src/kosmickrisp/vulkan/kk_cmd_dispatch.c new file mode 100644 index 00000000000..338766ef12d --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_cmd_dispatch.c @@ -0,0 +1,152 @@ +/* + * Copyright © 2025 LunarG, Inc + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "vulkan/vulkan_core.h" + +#include "kk_buffer.h" +#include "kk_cmd_buffer.h" +#include "kk_descriptor_set_layout.h" +#include "kk_device.h" +#include "kk_encoder.h" +#include "kk_entrypoints.h" +#include "kk_shader.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +#include "vk_common_entrypoints.h" + +void +kk_cmd_dispatch_pipeline(struct kk_cmd_buffer *cmd, + mtl_compute_encoder *encoder, + mtl_compute_pipeline_state *pipeline, + const void *push_data, size_t push_size, + uint32_t groupCountX, uint32_t groupCountY, + uint32_t groupCountZ) +{ + struct kk_root_descriptor_table *root = NULL; + struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, sizeof(*root), 8u); + /* kk_cmd_allocate_buffer already sets the error, we can just exit */ + if (!bo) + return; + + root = bo->cpu; + assert(push_size <= sizeof(root->push)); + memcpy(root->push, push_data, push_size); + root->cs.base_group[0] = 1; /* TODO_KOSMICKRISP This is hard-coded because we + know this is the size we create them with */ + root->cs.base_group[1] = 1; + root->cs.base_group[2] = 1; + + mtl_compute_set_buffer(encoder, bo->map, 0, 0); + mtl_compute_set_pipeline_state(encoder, pipeline); + + struct mtl_size grid_size = { + .x = groupCountX, + .y = groupCountY, + .z = groupCountZ, + }; + struct mtl_size local_size = { + .x = 1, + .y = 1, + .z = 1, + }; + mtl_dispatch_threads(encoder, grid_size, local_size); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t groupCountX, + uint32_t groupCountY, uint32_t groupCountZ) +{ + kk_CmdDispatchBase(commandBuffer, 0, 0, 0, groupCountX, groupCountY, + groupCountZ); +} + +static void +kk_flush_compute_state(struct kk_cmd_buffer *cmd) +{ + mtl_compute_encoder *enc = kk_compute_encoder(cmd); + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + // Fill Metal argument buffer with descriptor set addresses + struct kk_descriptor_state *desc = &cmd->state.cs.descriptors; + + if (desc->push_dirty) + kk_cmd_buffer_flush_push_descriptors(cmd, desc); + /* After push descriptors' buffers are created. Otherwise, the buffer where + * they live will not be created and cannot make it resident */ + if (desc->sets_not_resident) + kk_make_descriptor_resources_resident(cmd, + VK_PIPELINE_BIND_POINT_COMPUTE); + if (desc->root_dirty) + kk_upload_descriptor_root(cmd, VK_PIPELINE_BIND_POINT_COMPUTE); + + /* Make user allocated heaps resident */ + simple_mtx_lock(&dev->user_heap_cache.mutex); + if (cmd->encoder->main.user_heap_hash != dev->user_heap_cache.hash) { + cmd->encoder->main.user_heap_hash = dev->user_heap_cache.hash; + mtl_heap **heaps = util_dynarray_begin(&dev->user_heap_cache.handles); + uint32_t count = + util_dynarray_num_elements(&dev->user_heap_cache.handles, mtl_heap *); + mtl_compute_use_heaps(enc, heaps, count); + } + simple_mtx_unlock(&dev->user_heap_cache.mutex); + + struct kk_bo *root_buffer = desc->root.root_buffer; + if (root_buffer) + mtl_compute_set_buffer(enc, root_buffer->map, 0, 0); + + mtl_compute_set_pipeline_state(enc, cmd->state.cs.pipeline_state); + cmd->state.cs.dirty = 0u; +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t baseGroupX, + uint32_t baseGroupY, uint32_t baseGroupZ, + uint32_t groupCountX, uint32_t groupCountY, + uint32_t groupCountZ) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + + struct kk_descriptor_state *desc = &cmd->state.cs.descriptors; + desc->root_dirty |= desc->root.cs.base_group[0] != baseGroupX; + desc->root_dirty |= desc->root.cs.base_group[1] != baseGroupY; + desc->root_dirty |= desc->root.cs.base_group[2] != baseGroupZ; + desc->root.cs.base_group[0] = baseGroupX; + desc->root.cs.base_group[1] = baseGroupY; + desc->root.cs.base_group[2] = baseGroupZ; + + kk_flush_compute_state(cmd); + + struct mtl_size grid_size = { + .x = groupCountX, + .y = groupCountY, + .z = groupCountZ, + }; + mtl_compute_encoder *enc = kk_compute_encoder(cmd); + mtl_dispatch_threads(enc, grid_size, cmd->state.cs.local_size); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, + VkDeviceSize offset) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_buffer, buffer, _buffer); + + struct kk_descriptor_state *desc = &cmd->state.cs.descriptors; + desc->root_dirty |= desc->root.cs.base_group[0] != 0; + desc->root_dirty |= desc->root.cs.base_group[1] != 0; + desc->root_dirty |= desc->root.cs.base_group[2] != 0; + desc->root.cs.base_group[0] = 0; + desc->root.cs.base_group[1] = 0; + desc->root.cs.base_group[2] = 0; + + kk_flush_compute_state(cmd); + + mtl_compute_encoder *enc = kk_compute_encoder(cmd); + mtl_dispatch_threadgroups_with_indirect_buffer( + enc, buffer->mtl_handle, offset, cmd->state.cs.local_size); +} diff --git a/src/kosmickrisp/vulkan/kk_cmd_draw.c b/src/kosmickrisp/vulkan/kk_cmd_draw.c new file mode 100644 index 00000000000..84008e68af6 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_cmd_draw.c @@ -0,0 +1,1010 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2024 Valve Corporation + * Copyright 2024 Alyssa Rosenzweig + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_entrypoints.h" + +#include "kk_buffer.h" +#include "kk_cmd_buffer.h" +#include "kk_encoder.h" +#include "kk_format.h" +#include "kk_image_view.h" +#include "kk_query_pool.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" +#include "kosmickrisp/bridge/vk_to_mtl_map.h" + +#include "vulkan/util/vk_format.h" + +static void +kk_cmd_buffer_dirty_render_pass(struct kk_cmd_buffer *cmd) +{ + struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; + + /* These depend on color attachment count */ + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES); + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES); + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS); + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS); + + /* These depend on the depth/stencil format */ + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE); + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE); + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE); + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE); + + /* This may depend on render targets for ESO */ + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES); + + /* This may depend on render targets */ + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_COLOR_ATTACHMENT_MAP); +} + +static void +kk_attachment_init(struct kk_attachment *att, + const VkRenderingAttachmentInfo *info) +{ + if (info == NULL || info->imageView == VK_NULL_HANDLE) { + *att = (struct kk_attachment){ + .iview = NULL, + }; + return; + } + + VK_FROM_HANDLE(kk_image_view, iview, info->imageView); + *att = (struct kk_attachment){ + .vk_format = iview->vk.format, + .iview = iview, + }; + + if (info->resolveMode != VK_RESOLVE_MODE_NONE) { + VK_FROM_HANDLE(kk_image_view, res_iview, info->resolveImageView); + att->resolve_mode = info->resolveMode; + att->resolve_iview = res_iview; + } + + att->store_op = info->storeOp; +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetRenderingAreaGranularityKHR( + VkDevice device, const VkRenderingAreaInfoKHR *pRenderingAreaInfo, + VkExtent2D *pGranularity) +{ + *pGranularity = (VkExtent2D){.width = 1, .height = 1}; +} + +static void +kk_merge_render_iview(VkExtent2D *extent, struct kk_image_view *iview) +{ + if (iview) { + /* TODO: is this right for ycbcr? */ + unsigned level = iview->vk.base_mip_level; + unsigned width = u_minify(iview->vk.image->extent.width, level); + unsigned height = u_minify(iview->vk.image->extent.height, level); + + extent->width = MAX2(extent->width, width); + extent->height = MAX2(extent->height, height); + } +} + +static void +kk_fill_common_attachment_description( + mtl_render_pass_attachment_descriptor *descriptor, + const struct kk_image_view *iview, const VkRenderingAttachmentInfo *info, + bool force_attachment_load) +{ + assert(iview->plane_count == + 1); /* TODO_KOSMICKRISP Handle multiplanar images? */ + mtl_render_pass_attachment_descriptor_set_texture( + descriptor, iview->planes[0].mtl_handle_render); + mtl_render_pass_attachment_descriptor_set_level(descriptor, + iview->vk.base_mip_level); + mtl_render_pass_attachment_descriptor_set_slice(descriptor, + iview->vk.base_array_layer); + enum mtl_load_action load_action = + force_attachment_load + ? MTL_LOAD_ACTION_LOAD + : vk_attachment_load_op_to_mtl_load_action(info->loadOp); + mtl_render_pass_attachment_descriptor_set_load_action(descriptor, + load_action); + /* We need to force attachment store to correctly handle situations where the + * attachment is written to in a subpass, and later read from in the next one + * with the store operation being something else than store. The other reason + * being that we break renderpasses when a pipeline barrier is used, so we + * need to not loose the information of the attachment when we restart it. */ + enum mtl_store_action store_action = MTL_STORE_ACTION_STORE; + mtl_render_pass_attachment_descriptor_set_store_action(descriptor, + store_action); +} + +static struct mtl_clear_color +vk_clear_color_value_to_mtl_clear_color(union VkClearColorValue color, + enum pipe_format format) +{ + struct mtl_clear_color value; + if (util_format_is_pure_sint(format)) { + value.red = color.int32[0]; + value.green = color.int32[1]; + value.blue = color.int32[2]; + value.alpha = color.int32[3]; + } else if (util_format_is_pure_uint(format)) { + value.red = color.uint32[0]; + value.green = color.uint32[1]; + value.blue = color.uint32[2]; + value.alpha = color.uint32[3]; + } else { + value.red = color.float32[0]; + value.green = color.float32[1]; + value.blue = color.float32[2]; + value.alpha = color.float32[3]; + } + + /* Apply swizzle to color since Metal does not allow swizzle for renderable + * textures, but we need to support that for formats like + * VK_FORMAT_B4G4R4A4_UNORM_PACK16 */ + const struct kk_va_format *supported_format = kk_get_va_format(format); + struct mtl_clear_color swizzled_color; + for (uint32_t i = 0u; i < 4; ++i) + swizzled_color.channel[i] = + value.channel[supported_format->swizzle.channels[i]]; + + return swizzled_color; +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdBeginRendering(VkCommandBuffer commandBuffer, + const VkRenderingInfo *pRenderingInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + struct kk_rendering_state *render = &cmd->state.gfx.render; + + memset(render, 0, sizeof(*render)); + + render->flags = pRenderingInfo->flags; + render->area = pRenderingInfo->renderArea; + render->view_mask = pRenderingInfo->viewMask; + render->layer_count = pRenderingInfo->layerCount; + render->samples = 0; + render->color_att_count = pRenderingInfo->colorAttachmentCount; + + const uint32_t layer_count = render->view_mask + ? util_last_bit(render->view_mask) + : render->layer_count; + + VkExtent2D framebuffer_extent = {.width = 0u, .height = 0u}; + bool does_any_attachment_clear = false; + for (uint32_t i = 0; i < render->color_att_count; i++) { + kk_attachment_init(&render->color_att[i], + &pRenderingInfo->pColorAttachments[i]); + kk_merge_render_iview(&framebuffer_extent, render->color_att[i].iview); + does_any_attachment_clear |= + (pRenderingInfo->pColorAttachments[i].loadOp == + VK_ATTACHMENT_LOAD_OP_CLEAR); + } + if (pRenderingInfo->pDepthAttachment) + does_any_attachment_clear |= (pRenderingInfo->pDepthAttachment->loadOp == + VK_ATTACHMENT_LOAD_OP_CLEAR); + if (pRenderingInfo->pStencilAttachment) + does_any_attachment_clear |= + (pRenderingInfo->pStencilAttachment->loadOp == + VK_ATTACHMENT_LOAD_OP_CLEAR); + + kk_attachment_init(&render->depth_att, pRenderingInfo->pDepthAttachment); + kk_attachment_init(&render->stencil_att, pRenderingInfo->pStencilAttachment); + kk_merge_render_iview(&framebuffer_extent, + render->depth_att.iview ?: render->stencil_att.iview); + + const VkRenderingFragmentShadingRateAttachmentInfoKHR *fsr_att_info = + vk_find_struct_const(pRenderingInfo->pNext, + RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); + if (fsr_att_info != NULL && fsr_att_info->imageView != VK_NULL_HANDLE) { + VK_FROM_HANDLE(kk_image_view, iview, fsr_att_info->imageView); + render->fsr_att = (struct kk_attachment){ + .vk_format = iview->vk.format, + .iview = iview, + .store_op = VK_ATTACHMENT_STORE_OP_NONE, + }; + } + + const VkRenderingAttachmentLocationInfoKHR ral_info = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_LOCATION_INFO_KHR, + .colorAttachmentCount = pRenderingInfo->colorAttachmentCount, + }; + vk_cmd_set_rendering_attachment_locations(&cmd->vk, &ral_info); + + kk_cmd_buffer_dirty_render_pass(cmd); + mtl_render_pass_descriptor *pass_descriptor = + mtl_new_render_pass_descriptor(); + + /* Framebufferless rendering, need to set pass_descriptors + * renderTargetWidth/Height to non-0 values and defaultRasterSampleCount */ + if (framebuffer_extent.width == 0u && framebuffer_extent.height == 0u) { + framebuffer_extent.width = render->area.extent.width; + framebuffer_extent.height = render->area.extent.height; + mtl_render_pass_descriptor_set_render_target_width( + pass_descriptor, framebuffer_extent.width); + mtl_render_pass_descriptor_set_render_target_height( + pass_descriptor, framebuffer_extent.height); + mtl_render_pass_descriptor_set_default_raster_sample_count( + pass_descriptor, 1u); + } + + /* Check if we are rendering to the whole framebuffer. Required to understand + * if we need to load to avoid clearing all attachment when loading. + */ + bool is_whole_framebuffer = + framebuffer_extent.width == render->area.extent.width && + framebuffer_extent.height == render->area.extent.height && + render->area.offset.x == 0u && render->area.offset.y == 0u && + (render->view_mask == 0u || + render->view_mask == BITFIELD64_MASK(render->layer_count)); + + /* Understand if the render area is tile aligned so we know if we actually + * need to load the tile to not lose information. */ + uint32_t tile_alignment = 31u; + bool is_tile_aligned = !(render->area.offset.x & tile_alignment) && + !(render->area.offset.y & tile_alignment) && + !(render->area.extent.width & tile_alignment) && + !(render->area.extent.height & tile_alignment); + + /* Rendering to the whole framebuffer */ + is_tile_aligned |= is_whole_framebuffer; + + /* There are 3 cases where we need to force a load instead of using the user + * defined load operation: + * 1. Render area is not tile aligned + * 2. Load operation is clear but doesn't render to the whole attachment + * 3. Resuming renderpass + */ + bool force_attachment_load = + !is_tile_aligned || + (!is_whole_framebuffer && does_any_attachment_clear) || + (render->flags & VK_RENDERING_RESUMING_BIT); + + for (uint32_t i = 0; i < render->color_att_count; i++) { + const struct kk_image_view *iview = render->color_att[i].iview; + if (!iview) + continue; + + assert(iview->plane_count == + 1); /* TODO_KOSMICKRISP Handle multiplanar images? */ + const struct kk_image *image = + container_of(iview->vk.image, struct kk_image, vk); + render->samples = image->vk.samples; + + mtl_render_pass_attachment_descriptor *attachment_descriptor = + mtl_render_pass_descriptor_get_color_attachment(pass_descriptor, i); + kk_fill_common_attachment_description( + attachment_descriptor, iview, &pRenderingInfo->pColorAttachments[i], + force_attachment_load); + struct mtl_clear_color clear_color = + vk_clear_color_value_to_mtl_clear_color( + pRenderingInfo->pColorAttachments[i].clearValue.color, + iview->planes[0].format); + mtl_render_pass_attachment_descriptor_set_clear_color( + attachment_descriptor, clear_color); + } + + if (render->depth_att.iview) { + const struct kk_image_view *iview = render->depth_att.iview; + const struct kk_image *image = + container_of(iview->vk.image, struct kk_image, vk); + render->samples = image->vk.samples; + + mtl_render_pass_attachment_descriptor *attachment_descriptor = + mtl_render_pass_descriptor_get_depth_attachment(pass_descriptor); + kk_fill_common_attachment_description( + attachment_descriptor, render->depth_att.iview, + pRenderingInfo->pDepthAttachment, force_attachment_load); + mtl_render_pass_attachment_descriptor_set_clear_depth( + attachment_descriptor, + pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth); + } + if (render->stencil_att.iview) { + const struct kk_image_view *iview = render->stencil_att.iview; + const struct kk_image *image = + container_of(iview->vk.image, struct kk_image, vk); + render->samples = image->vk.samples; + + mtl_render_pass_attachment_descriptor *attachment_descriptor = + mtl_render_pass_descriptor_get_stencil_attachment(pass_descriptor); + kk_fill_common_attachment_description( + attachment_descriptor, render->stencil_att.iview, + pRenderingInfo->pStencilAttachment, force_attachment_load); + mtl_render_pass_attachment_descriptor_set_clear_stencil( + attachment_descriptor, + pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil); + } + + /* Render targets are always arrays */ + mtl_render_pass_descriptor_set_render_target_array_length( + pass_descriptor, layer_count ? layer_count : 1u); + + /* Set global visibility buffer */ + mtl_render_pass_descriptor_set_visibility_buffer( + pass_descriptor, dev->occlusion_queries.bo->map); + + // TODO_KOSMICKRISP Fragment shading rate support goes here if Metal supports + // it + + /* Start new encoder and encode sync commands from previous barriers (aka + * fences) */ + kk_encoder_start_render(cmd, pass_descriptor, render->view_mask); + + /* Store descriptor in case we need to restart the pass at pipeline barrier, + * but force loads */ + for (uint32_t i = 0; i < render->color_att_count; i++) { + const struct kk_image_view *iview = render->color_att[i].iview; + if (!iview) + continue; + mtl_render_pass_attachment_descriptor *attachment_descriptor = + mtl_render_pass_descriptor_get_color_attachment(pass_descriptor, i); + mtl_render_pass_attachment_descriptor_set_load_action( + attachment_descriptor, MTL_LOAD_ACTION_LOAD); + } + if (render->depth_att.iview) { + mtl_render_pass_attachment_descriptor *attachment_descriptor = + mtl_render_pass_descriptor_get_depth_attachment(pass_descriptor); + mtl_render_pass_attachment_descriptor_set_load_action( + attachment_descriptor, MTL_LOAD_ACTION_LOAD); + } + if (render->stencil_att.iview) { + mtl_render_pass_attachment_descriptor *attachment_descriptor = + mtl_render_pass_descriptor_get_stencil_attachment(pass_descriptor); + mtl_render_pass_attachment_descriptor_set_load_action( + attachment_descriptor, MTL_LOAD_ACTION_LOAD); + } + cmd->state.gfx.render_pass_descriptor = pass_descriptor; + + kk_cmd_buffer_dirty_all_gfx(cmd); + + if (render->flags & VK_RENDERING_RESUMING_BIT) + return; + + /* Clear attachments if we forced a load and there's a clear */ + if (!force_attachment_load || !does_any_attachment_clear) + return; + + uint32_t clear_count = 0; + VkClearAttachment clear_att[KK_MAX_RTS + 1]; + for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) { + const VkRenderingAttachmentInfo *att_info = + &pRenderingInfo->pColorAttachments[i]; + if (att_info->imageView == VK_NULL_HANDLE || + att_info->loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR) + continue; + + clear_att[clear_count++] = (VkClearAttachment){ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = i, + .clearValue = att_info->clearValue, + }; + } + + clear_att[clear_count] = (VkClearAttachment){ + .aspectMask = 0, + }; + if (pRenderingInfo->pDepthAttachment != NULL && + pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE && + pRenderingInfo->pDepthAttachment->loadOp == + VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_att[clear_count].aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT; + clear_att[clear_count].clearValue.depthStencil.depth = + pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth; + } + if (pRenderingInfo->pStencilAttachment != NULL && + pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE && + pRenderingInfo->pStencilAttachment->loadOp == + VK_ATTACHMENT_LOAD_OP_CLEAR) { + clear_att[clear_count].aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + clear_att[clear_count].clearValue.depthStencil.stencil = + pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil; + } + if (clear_att[clear_count].aspectMask != 0) + clear_count++; + + if (clear_count > 0) { + const VkClearRect clear_rect = { + .rect = render->area, + .baseArrayLayer = 0, + .layerCount = render->view_mask ? 1 : render->layer_count, + }; + + kk_CmdClearAttachments(kk_cmd_buffer_to_handle(cmd), clear_count, + clear_att, 1, &clear_rect); + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdEndRendering(VkCommandBuffer commandBuffer) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + struct kk_rendering_state *render = &cmd->state.gfx.render; + bool need_resolve = false; + + /* Translate render state back to VK for meta */ + VkRenderingAttachmentInfo vk_color_att[KK_MAX_RTS]; + for (uint32_t i = 0; i < render->color_att_count; i++) { + if (render->color_att[i].resolve_mode != VK_RESOLVE_MODE_NONE) + need_resolve = true; + + vk_color_att[i] = (VkRenderingAttachmentInfo){ + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = kk_image_view_to_handle(render->color_att[i].iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + .resolveMode = render->color_att[i].resolve_mode, + .resolveImageView = + kk_image_view_to_handle(render->color_att[i].resolve_iview), + .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + } + + const VkRenderingAttachmentInfo vk_depth_att = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = kk_image_view_to_handle(render->depth_att.iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + .resolveMode = render->depth_att.resolve_mode, + .resolveImageView = + kk_image_view_to_handle(render->depth_att.resolve_iview), + .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + if (render->depth_att.resolve_mode != VK_RESOLVE_MODE_NONE) + need_resolve = true; + + const VkRenderingAttachmentInfo vk_stencil_att = { + .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, + .imageView = kk_image_view_to_handle(render->stencil_att.iview), + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + .resolveMode = render->stencil_att.resolve_mode, + .resolveImageView = + kk_image_view_to_handle(render->stencil_att.resolve_iview), + .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + if (render->stencil_att.resolve_mode != VK_RESOLVE_MODE_NONE) + need_resolve = true; + + const VkRenderingInfo vk_render = { + .sType = VK_STRUCTURE_TYPE_RENDERING_INFO, + .renderArea = render->area, + .layerCount = render->layer_count, + .viewMask = render->view_mask, + .colorAttachmentCount = render->color_att_count, + .pColorAttachments = vk_color_att, + .pDepthAttachment = &vk_depth_att, + .pStencilAttachment = &vk_stencil_att, + }; + + /* Clean up previous encoder */ + kk_encoder_signal_fence_and_end(cmd); + mtl_release(cmd->state.gfx.render_pass_descriptor); + cmd->state.gfx.render_pass_descriptor = NULL; + + if (render->flags & VK_RENDERING_SUSPENDING_BIT) + need_resolve = false; + + memset(render, 0, sizeof(*render)); + + if (need_resolve) { + kk_meta_resolve_rendering(cmd, &vk_render); + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdBindIndexBuffer2KHR(VkCommandBuffer commandBuffer, VkBuffer _buffer, + VkDeviceSize offset, VkDeviceSize size, + VkIndexType indexType) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_buffer, buffer, _buffer); + + cmd->state.gfx.index.handle = buffer->mtl_handle; + cmd->state.gfx.index.size = size; + cmd->state.gfx.index.offset = offset; + cmd->state.gfx.index.bytes_per_index = vk_index_type_to_bytes(indexType); + cmd->state.gfx.index.restart = vk_index_to_restart(indexType); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, + uint32_t bindingCount, const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets, + const VkDeviceSize *pSizes, + const VkDeviceSize *pStrides) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + + if (pStrides) { + vk_cmd_set_vertex_binding_strides(&cmd->vk, firstBinding, bindingCount, + pStrides); + } + + for (uint32_t i = 0; i < bindingCount; i++) { + VK_FROM_HANDLE(kk_buffer, buffer, pBuffers[i]); + uint32_t idx = firstBinding + i; + uint64_t size = pSizes ? pSizes[i] : VK_WHOLE_SIZE; + const struct kk_addr_range addr_range = + kk_buffer_addr_range(buffer, pOffsets[i], size); + cmd->state.gfx.vb.addr_range[idx] = addr_range; + cmd->state.gfx.vb.handles[idx] = buffer->mtl_handle; + cmd->state.gfx.dirty |= KK_DIRTY_VB; + } +} + +static void +kk_flush_vp_state(struct kk_cmd_buffer *cmd) +{ + const struct vk_dynamic_graphics_state *dyn = + &cmd->vk.dynamic_graphics_state; + + /* We always need at least 1 viewport for the hardware. With rasterizer + * discard the app may not supply any, but we can just program garbage. + */ + unsigned count = MAX2(dyn->vp.scissor_count, 1); + + /* Need to clamp scissor rectangles to render area, otherwise Metal doesn't + * like it */ + struct mtl_scissor_rect rects[KK_MAX_VIEWPORTS] = {0}; + VkOffset2D origin = cmd->state.gfx.render.area.offset; + VkOffset2D end = {.x = origin.x + cmd->state.gfx.render.area.extent.width, + .y = origin.y + cmd->state.gfx.render.area.extent.height}; + for (uint32_t i = 0; i < dyn->vp.scissor_count; i++) { + const VkRect2D *rect = &dyn->vp.scissors[i]; + + size_t x0 = CLAMP(rect->offset.x, origin.x, end.x); + size_t x1 = CLAMP(rect->offset.x + rect->extent.width, origin.x, end.x); + size_t y0 = CLAMP(rect->offset.y, origin.y, end.y); + size_t y1 = CLAMP(rect->offset.y + rect->extent.height, origin.y, end.y); + size_t minx = MIN2(x0, x1); + size_t miny = MIN2(y0, y1); + size_t maxx = MAX2(x0, x1); + size_t maxy = MAX2(y0, y1); + rects[i].x = minx; + rects[i].y = miny; + rects[i].width = maxx - minx; + rects[i].height = maxy - miny; + } + + mtl_set_scissor_rects(kk_render_encoder(cmd), rects, count); + + count = MAX2(dyn->vp.viewport_count, 1); + + struct mtl_viewport viewports[KK_MAX_VIEWPORTS] = {0}; + + /* NDC in Metal is pointing downwards. Vulkan is pointing upwards. Account + * for that here */ + for (uint32_t i = 0; i < dyn->vp.viewport_count; i++) { + const VkViewport *vp = &dyn->vp.viewports[i]; + + viewports[i].originX = vp->x; + viewports[i].originY = vp->y + vp->height; + viewports[i].width = vp->width; + viewports[i].height = -vp->height; + + viewports[i].znear = vp->minDepth; + viewports[i].zfar = vp->maxDepth; + } + + mtl_set_viewports(kk_render_encoder(cmd), viewports, count); +} + +static inline uint32_t +kk_calculate_vbo_clamp(uint64_t vbuf, uint64_t sink, enum pipe_format format, + uint32_t size_B, uint32_t stride_B, uint32_t offset_B, + uint64_t *vbuf_out) +{ + unsigned elsize_B = util_format_get_blocksize(format); + unsigned subtracted_B = offset_B + elsize_B; + + /* If at least one index is valid, determine the max. Otherwise, direct reads + * to zero. + */ + if (size_B >= subtracted_B) { + *vbuf_out = vbuf + offset_B; + + /* If stride is zero, do not clamp, everything is valid. */ + if (stride_B) + return ((size_B - subtracted_B) / stride_B); + else + return UINT32_MAX; + } else { + *vbuf_out = sink; + return 0; + } +} + +static void +set_empty_scissor(mtl_render_encoder *enc) +{ + struct mtl_scissor_rect rect = {.x = 0u, .y = 0u, .width = 0u, .height = 0u}; + mtl_set_scissor_rects(enc, &rect, 1); +} + +/* TODO_KOSMICKRISP: Move to common */ +static inline enum mesa_prim +vk_conv_topology(VkPrimitiveTopology topology) +{ + switch (topology) { + case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: + return MESA_PRIM_POINTS; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: + return MESA_PRIM_LINES; + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: + return MESA_PRIM_LINE_STRIP; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wswitch" + case VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA: +#pragma GCC diagnostic pop + return MESA_PRIM_TRIANGLES; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: + return MESA_PRIM_TRIANGLE_STRIP; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: + return MESA_PRIM_TRIANGLE_FAN; + case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: + return MESA_PRIM_LINES_ADJACENCY; + case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: + return MESA_PRIM_LINE_STRIP_ADJACENCY; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: + return MESA_PRIM_TRIANGLES_ADJACENCY; + case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: + return MESA_PRIM_TRIANGLE_STRIP_ADJACENCY; + case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: + return MESA_PRIM_PATCHES; + default: + UNREACHABLE("invalid"); + } +} + +static void +kk_flush_draw_state(struct kk_cmd_buffer *cmd) +{ + struct kk_device *device = kk_cmd_buffer_device(cmd); + struct kk_graphics_state *gfx = &cmd->state.gfx; + struct vk_dynamic_graphics_state *dyn = &cmd->vk.dynamic_graphics_state; + struct kk_descriptor_state *desc = &cmd->state.gfx.descriptors; + mtl_render_encoder *enc = kk_render_encoder(cmd); + + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES)) { + u_foreach_bit(ndx, dyn->vi->bindings_valid) { + desc->root.draw.buffer_strides[ndx] = dyn->vi_binding_strides[ndx]; + } + desc->root_dirty = true; + } + + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE)) { + if (dyn->rs.rasterizer_discard_enable) { + set_empty_scissor(enc); + } else { + /* Enforce setting the correct scissors */ + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT); + } + } + + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE)) { + gfx->is_cull_front_and_back = + dyn->rs.cull_mode == VK_CULL_MODE_FRONT_AND_BACK; + if (gfx->is_cull_front_and_back) { + set_empty_scissor(enc); + } else { + mtl_set_cull_mode(enc, + vk_front_face_to_mtl_cull_mode(dyn->rs.cull_mode)); + /* Enforce setting the correct scissors */ + BITSET_SET(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT); + } + } + + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) { + gfx->primitive_type = vk_primitive_topology_to_mtl_primitive_type( + dyn->ia.primitive_topology); + gfx->prim = vk_conv_topology(dyn->ia.primitive_topology); + } + + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) { + gfx->restart_disabled = !dyn->ia.primitive_restart_enable; + } + + /* We enable raster discard by setting scissor to size (0, 0) */ + if (!(dyn->rs.rasterizer_discard_enable || gfx->is_cull_front_and_back) && + (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSOR_COUNT) || + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS))) + kk_flush_vp_state(cmd); + + if (cmd->state.gfx.is_depth_stencil_dynamic && + (cmd->state.gfx.render.depth_att.vk_format != VK_FORMAT_UNDEFINED || + cmd->state.gfx.render.stencil_att.vk_format != VK_FORMAT_UNDEFINED) && + (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) | + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) | + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) | + // BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) + // | BITSET_TEST(dyn->dirty, + // MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS) | + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) | + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) | + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) | + BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK))) { + kk_cmd_release_dynamic_ds_state(cmd); + + bool has_depth = dyn->rp.attachments & MESA_VK_RP_ATTACHMENT_DEPTH_BIT; + bool has_stencil = + dyn->rp.attachments & MESA_VK_RP_ATTACHMENT_STENCIL_BIT; + gfx->depth_stencil_state = kk_compile_depth_stencil_state( + device, &dyn->ds, has_depth, has_stencil); + mtl_set_depth_stencil_state(enc, gfx->depth_stencil_state); + } + + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE)) { + mtl_set_front_face_winding( + enc, vk_front_face_to_mtl_winding( + cmd->vk.dynamic_graphics_state.rs.front_face)); + } + + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) { + mtl_set_depth_bias(enc, dyn->rs.depth_bias.constant_factor, + dyn->rs.depth_bias.slope_factor, + dyn->rs.depth_bias.clamp); + } + + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE)) { + enum mtl_depth_clip_mode mode = dyn->rs.depth_clamp_enable + ? MTL_DEPTH_CLIP_MODE_CLAMP + : MTL_DEPTH_CLIP_MODE_CLIP; + mtl_set_depth_clip_mode(enc, mode); + } + + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) + mtl_set_stencil_references( + enc, cmd->vk.dynamic_graphics_state.ds.stencil.front.reference, + cmd->vk.dynamic_graphics_state.ds.stencil.back.reference); + + if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) { + static_assert(sizeof(desc->root.draw.blend_constant) == + sizeof(dyn->cb.blend_constants), + "common size"); + + memcpy(desc->root.draw.blend_constant, dyn->cb.blend_constants, + sizeof(dyn->cb.blend_constants)); + desc->root_dirty = true; + } + + if (gfx->dirty & KK_DIRTY_VB) { + unsigned slot = 0; + cmd->state.gfx.vb.max_vertices = 0u; + u_foreach_bit(i, cmd->state.gfx.vb.attribs_read) { + if (dyn->vi->attributes_valid & BITFIELD_BIT(i)) { + struct vk_vertex_attribute_state attr = dyn->vi->attributes[i]; + struct kk_addr_range vb = gfx->vb.addr_range[attr.binding]; + + mtl_render_use_resource(enc, gfx->vb.handles[attr.binding], + MTL_RESOURCE_USAGE_READ); + desc->root.draw.attrib_clamps[slot] = kk_calculate_vbo_clamp( + vb.addr, 0, vk_format_to_pipe_format(attr.format), vb.range, + dyn->vi_binding_strides[attr.binding], attr.offset, + &desc->root.draw.attrib_base[slot]); + desc->root.draw.buffer_strides[attr.binding] = + dyn->vi_binding_strides[attr.binding]; + + cmd->state.gfx.vb.max_vertices = + MAX2(vb.range / dyn->vi_binding_strides[attr.binding], + cmd->state.gfx.vb.max_vertices); + } + slot++; + } + desc->root_dirty = true; + } + + if (gfx->dirty & KK_DIRTY_PIPELINE) { + mtl_render_set_pipeline_state(enc, gfx->pipeline_state); + if (gfx->depth_stencil_state) + mtl_set_depth_stencil_state(enc, gfx->depth_stencil_state); + } + + if (desc->push_dirty) + kk_cmd_buffer_flush_push_descriptors(cmd, desc); + /* After push descriptors' buffers are created. Otherwise, the buffer where + * they live will not be created and cannot make it resident */ + if (desc->sets_not_resident) + kk_make_descriptor_resources_resident(cmd, + VK_PIPELINE_BIND_POINT_GRAPHICS); + if (desc->root_dirty) + kk_upload_descriptor_root(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS); + + /* Make user allocated heaps resident */ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + simple_mtx_lock(&dev->user_heap_cache.mutex); + if (cmd->encoder->main.user_heap_hash != dev->user_heap_cache.hash) { + cmd->encoder->main.user_heap_hash = dev->user_heap_cache.hash; + mtl_heap **heaps = util_dynarray_begin(&dev->user_heap_cache.handles); + uint32_t count = + util_dynarray_num_elements(&dev->user_heap_cache.handles, mtl_heap *); + mtl_render_use_heaps(enc, heaps, count); + } + simple_mtx_unlock(&dev->user_heap_cache.mutex); + + struct kk_bo *root_buffer = desc->root.root_buffer; + if (root_buffer) { + mtl_set_vertex_buffer(enc, root_buffer->map, 0, 0); + mtl_set_fragment_buffer(enc, root_buffer->map, 0, 0); + } + + if (gfx->dirty & KK_DIRTY_OCCLUSION) { + mtl_set_visibility_result_mode(enc, gfx->occlusion.mode, + gfx->occlusion.index * sizeof(uint64_t)); + } + + gfx->dirty = 0u; + vk_dynamic_graphics_state_clear_dirty(dyn); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, + uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + + kk_flush_draw_state(cmd); + + /* Metal does not support triangle fans */ + bool requires_unroll = cmd->state.gfx.prim == MESA_PRIM_TRIANGLE_FAN; + if (requires_unroll) { + VkDrawIndirectCommand draw = { + .vertexCount = vertexCount, + .instanceCount = instanceCount, + .firstVertex = firstVertex, + .firstInstance = firstInstance, + }; + struct kk_pool pool = kk_pool_upload(cmd, &draw, sizeof(draw), 4u); + kk_encoder_render_triangle_fan_indirect(cmd, pool.handle, 0u); + } else { + mtl_render_encoder *enc = kk_render_encoder(cmd); + mtl_draw_primitives(enc, cmd->state.gfx.primitive_type, firstVertex, + vertexCount, instanceCount, firstInstance); + } +} + +static bool +requires_increasing_index_el_size(struct kk_cmd_buffer *cmd) +{ + enum mesa_prim prim = cmd->state.gfx.prim; + switch (prim) { + case MESA_PRIM_LINE_STRIP: + case MESA_PRIM_TRIANGLE_STRIP: + case MESA_PRIM_TRIANGLE_FAN: + return (cmd->state.gfx.restart_disabled && + cmd->state.gfx.index.bytes_per_index < sizeof(uint32_t)); + default: + return false; + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, + uint32_t instanceCount, uint32_t firstIndex, + int32_t vertexOffset, uint32_t firstInstance) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + + kk_flush_draw_state(cmd); + + /* Metal does not support triangle fans */ + bool requires_triangle_fan_unroll = + cmd->state.gfx.prim == MESA_PRIM_TRIANGLE_FAN; + + /* Metal does not support disabling primitive restart. We need to create a + * new index buffer for primitives that allow restart (line strip, triangle + * strip and triangle fan). Never ever support + * VK_EXT_primitive_topology_list_restart since it'll just add overhead */ + bool increase_index_el_size = requires_increasing_index_el_size(cmd); + if (requires_triangle_fan_unroll || increase_index_el_size) { + VkDrawIndexedIndirectCommand draw = { + .indexCount = indexCount, + .instanceCount = instanceCount, + .firstIndex = firstIndex, + .vertexOffset = vertexOffset, + .firstInstance = firstInstance, + }; + struct kk_pool pool = kk_pool_upload(cmd, &draw, sizeof(draw), 4u); + kk_encoder_render_triangle_fan_indexed_indirect(cmd, pool.handle, 0u, + increase_index_el_size); + } else { + uint32_t bytes_per_index = cmd->state.gfx.index.bytes_per_index; + enum mtl_index_type index_type = + index_size_in_bytes_to_mtl_index_type(bytes_per_index); + uint32_t index_buffer_offset_B = + firstIndex * bytes_per_index + cmd->state.gfx.index.offset; + + mtl_render_encoder *enc = kk_render_encoder(cmd); + mtl_draw_indexed_primitives( + enc, cmd->state.gfx.primitive_type, indexCount, index_type, + cmd->state.gfx.index.handle, index_buffer_offset_B, instanceCount, + vertexOffset, firstInstance); + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, + VkDeviceSize offset, uint32_t drawCount, uint32_t stride) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_buffer, buffer, _buffer); + + kk_flush_draw_state(cmd); + mtl_render_encoder *enc = kk_render_encoder(cmd); + + /* Metal does not support triangle fans */ + bool requires_unroll = cmd->state.gfx.prim == MESA_PRIM_TRIANGLE_FAN; + for (uint32_t i = 0u; i < drawCount; ++i, offset += stride) { + if (requires_unroll) { + kk_encoder_render_triangle_fan_indirect(cmd, buffer->mtl_handle, + offset); + } else { + mtl_draw_primitives_indirect(enc, cmd->state.gfx.primitive_type, + buffer->mtl_handle, offset); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, + VkDeviceSize offset, VkBuffer countBuffer, + VkDeviceSize countBufferOffset, uint32_t maxDrawCount, + uint32_t stride) +{ + /* TODO_KOSMICKRISP */ +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, + VkDeviceSize offset, uint32_t drawCount, + uint32_t stride) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_buffer, buffer, _buffer); + + kk_flush_draw_state(cmd); + + /* Metal does not support triangle fans */ + bool requires_triangle_fan_unroll = + cmd->state.gfx.prim == MESA_PRIM_TRIANGLE_FAN; + + /* Metal does not support disabling primitive restart. We need to create a + * new index buffer for primitives that allow restart (line strip, triangle + * strip and triangle fan). Never ever support + * VK_EXT_primitive_topology_list_restart since it'll just add overhead */ + bool increase_index_el_size = requires_increasing_index_el_size(cmd); + for (uint32_t i = 0u; i < drawCount; ++i, offset += stride) { + if (requires_triangle_fan_unroll || increase_index_el_size) { + kk_encoder_render_triangle_fan_indexed_indirect( + cmd, buffer->mtl_handle, offset, increase_index_el_size); + } else { + uint32_t bytes_per_index = cmd->state.gfx.index.bytes_per_index; + enum mtl_index_type index_type = + index_size_in_bytes_to_mtl_index_type(bytes_per_index); + uint32_t index_buffer_offset = cmd->state.gfx.index.offset; + + mtl_render_encoder *enc = kk_render_encoder(cmd); + mtl_draw_indexed_primitives_indirect( + enc, cmd->state.gfx.primitive_type, index_type, + cmd->state.gfx.index.handle, index_buffer_offset, + buffer->mtl_handle, offset); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, + VkDeviceSize offset, VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, uint32_t stride) +{ + /* TODO_KOSMICKRISP */ +} diff --git a/src/kosmickrisp/vulkan/kk_cmd_meta.c b/src/kosmickrisp/vulkan/kk_cmd_meta.c new file mode 100644 index 00000000000..debf9cd467c --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_cmd_meta.c @@ -0,0 +1,318 @@ +/* + * Copyright 2024 Valve Corporation + * Copyright 2024 Alyssa Rosenzweig + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_private.h" + +#include "kk_buffer.h" +#include "kk_cmd_buffer.h" +#include "kk_encoder.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +#include "kk_entrypoints.h" + +static VkResult +kk_cmd_bind_map_buffer(struct vk_command_buffer *vk_cmd, + struct vk_meta_device *meta, VkBuffer _buffer, + void **map_out) +{ + struct kk_cmd_buffer *cmd = container_of(vk_cmd, struct kk_cmd_buffer, vk); + VK_FROM_HANDLE(kk_buffer, buffer, _buffer); + + assert(buffer->vk.size < UINT_MAX); + struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, buffer->vk.size, 16u); + if (unlikely(bo == NULL)) + return VK_ERROR_OUT_OF_POOL_MEMORY; + + /* Need to retain since VkBuffers release the mtl_handle too */ + mtl_retain(bo->map); + buffer->mtl_handle = bo->map; + buffer->vk.device_address = bo->gpu; + *map_out = bo->cpu; + mtl_compute_use_resource(cmd->encoder->main.encoder, buffer->mtl_handle, + MTL_RESOURCE_USAGE_WRITE | MTL_RESOURCE_USAGE_READ); + return VK_SUCCESS; +} + +VkResult +kk_device_init_meta(struct kk_device *dev) +{ + VkResult result = vk_meta_device_init(&dev->vk, &dev->meta); + if (result != VK_SUCCESS) + return result; + + dev->meta.use_gs_for_layer = false; + dev->meta.use_stencil_export = true; + dev->meta.use_rect_list_pipeline = true; + dev->meta.cmd_bind_map_buffer = kk_cmd_bind_map_buffer; + dev->meta.max_bind_map_buffer_size_B = 64 * 1024; + + for (unsigned i = 0; i < VK_META_BUFFER_CHUNK_SIZE_COUNT; ++i) { + dev->meta.buffer_access.optimal_wg_size[i] = 64; + } + + return VK_SUCCESS; +} + +void +kk_device_finish_meta(struct kk_device *dev) +{ + vk_meta_device_finish(&dev->vk, &dev->meta); +} + +struct kk_meta_save { + struct vk_vertex_input_state _dynamic_vi; + struct vk_sample_locations_state _dynamic_sl; + struct vk_dynamic_graphics_state dynamic; + struct { + union { + struct { + mtl_render_pipeline_state *ps; + mtl_depth_stencil_state *ds; + uint32_t attribs_read; + enum mtl_primitive_type primitive_type; + enum mtl_visibility_result_mode occlusion; + bool is_ds_dynamic; + } gfx; + struct { + mtl_compute_pipeline_state *pipeline_state; + struct mtl_size local_size; + } cs; + }; + } pipeline; + struct kk_descriptor_set *desc0; + struct kk_push_descriptor_set *push_desc0; + mtl_buffer *vb0_handle; + struct kk_addr_range vb0; + struct kk_buffer_address desc0_set_addr; + bool has_push_desc0; + uint8_t push[KK_MAX_PUSH_SIZE]; +}; + +static void +kk_meta_begin(struct kk_cmd_buffer *cmd, struct kk_meta_save *save, + VkPipelineBindPoint bind_point) +{ + struct kk_descriptor_state *desc = kk_get_descriptors_state(cmd, bind_point); + + if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + save->dynamic = cmd->vk.dynamic_graphics_state; + save->_dynamic_vi = cmd->state.gfx._dynamic_vi; + save->_dynamic_sl = cmd->state.gfx._dynamic_sl; + save->pipeline.gfx.ps = cmd->state.gfx.pipeline_state; + save->pipeline.gfx.ds = cmd->state.gfx.depth_stencil_state; + save->pipeline.gfx.attribs_read = cmd->state.gfx.vb.attribs_read; + save->pipeline.gfx.primitive_type = cmd->state.gfx.primitive_type; + save->pipeline.gfx.occlusion = cmd->state.gfx.occlusion.mode; + save->pipeline.gfx.is_ds_dynamic = + cmd->state.gfx.is_depth_stencil_dynamic; + + cmd->state.gfx.is_depth_stencil_dynamic = false; + cmd->state.gfx.depth_stencil_state = NULL; + cmd->state.gfx.occlusion.mode = MTL_VISIBILITY_RESULT_MODE_DISABLED; + cmd->state.gfx.dirty |= KK_DIRTY_OCCLUSION; + desc->root_dirty = true; + } else { + save->pipeline.cs.pipeline_state = cmd->state.cs.pipeline_state; + save->pipeline.cs.local_size = cmd->state.cs.local_size; + } + + save->vb0_handle = cmd->state.gfx.vb.handles[0]; + save->vb0 = cmd->state.gfx.vb.addr_range[0]; + + save->desc0 = desc->sets[0]; + save->has_push_desc0 = desc->push[0]; + if (save->has_push_desc0) + save->push_desc0 = desc->push[0]; + + static_assert(sizeof(save->push) == sizeof(desc->root.push), + "Size mismatch for push in meta_save"); + memcpy(save->push, desc->root.push, sizeof(save->push)); +} + +static void +kk_meta_end(struct kk_cmd_buffer *cmd, struct kk_meta_save *save, + VkPipelineBindPoint bind_point) +{ + struct kk_descriptor_state *desc = kk_get_descriptors_state(cmd, bind_point); + desc->root_dirty = true; + + if (save->desc0) { + desc->sets[0] = save->desc0; + desc->root.sets[0] = save->desc0->addr; + desc->set_sizes[0] = save->desc0->size; + desc->sets_not_resident |= BITFIELD_BIT(0); + desc->push_dirty &= ~BITFIELD_BIT(0); + } else if (save->has_push_desc0) { + desc->push[0] = save->push_desc0; + desc->sets_not_resident |= BITFIELD_BIT(0); + desc->push_dirty |= BITFIELD_BIT(0); + } + + if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { + /* Restore the dynamic state */ + assert(save->dynamic.vi == &cmd->state.gfx._dynamic_vi); + assert(save->dynamic.ms.sample_locations == &cmd->state.gfx._dynamic_sl); + cmd->vk.dynamic_graphics_state = save->dynamic; + cmd->state.gfx._dynamic_vi = save->_dynamic_vi; + cmd->state.gfx._dynamic_sl = save->_dynamic_sl; + memcpy(cmd->vk.dynamic_graphics_state.dirty, + cmd->vk.dynamic_graphics_state.set, + sizeof(cmd->vk.dynamic_graphics_state.set)); + + if (cmd->state.gfx.is_depth_stencil_dynamic) + mtl_release(cmd->state.gfx.depth_stencil_state); + cmd->state.gfx.pipeline_state = save->pipeline.gfx.ps; + cmd->state.gfx.depth_stencil_state = save->pipeline.gfx.ds; + cmd->state.gfx.primitive_type = save->pipeline.gfx.primitive_type; + cmd->state.gfx.vb.attribs_read = save->pipeline.gfx.attribs_read; + cmd->state.gfx.is_depth_stencil_dynamic = + save->pipeline.gfx.is_ds_dynamic; + cmd->state.gfx.dirty |= KK_DIRTY_PIPELINE; + + cmd->state.gfx.vb.addr_range[0] = save->vb0; + cmd->state.gfx.vb.handles[0] = save->vb0_handle; + cmd->state.gfx.dirty |= KK_DIRTY_VB; + + cmd->state.gfx.occlusion.mode = save->pipeline.gfx.occlusion; + cmd->state.gfx.dirty |= KK_DIRTY_OCCLUSION; + + desc->root_dirty = true; + } else { + cmd->state.cs.local_size = save->pipeline.cs.local_size; + cmd->state.cs.pipeline_state = save->pipeline.cs.pipeline_state; + } + + memcpy(desc->root.push, save->push, sizeof(save->push)); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, + VkDeviceSize dstOffset, VkDeviceSize dstRange, uint32_t data) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_buffer, buf, dstBuffer); + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + struct kk_meta_save save; + kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE); + mtl_compute_use_resource(kk_compute_encoder(cmd), buf->mtl_handle, + MTL_RESOURCE_USAGE_WRITE); + vk_meta_fill_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange, + data); + kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, + VkDeviceSize dstOffset, VkDeviceSize dstRange, + const void *pData) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_buffer, buf, dstBuffer); + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + struct kk_meta_save save; + kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE); + mtl_compute_use_resource(kk_compute_encoder(cmd), buf->mtl_handle, + MTL_RESOURCE_USAGE_WRITE); + vk_meta_update_buffer(&cmd->vk, &dev->meta, dstBuffer, dstOffset, dstRange, + pData); + kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_COMPUTE); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdBlitImage2(VkCommandBuffer commandBuffer, + const VkBlitImageInfo2 *pBlitImageInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + struct kk_meta_save save; + kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS); + vk_meta_blit_image2(&cmd->vk, &dev->meta, pBlitImageInfo); + kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdResolveImage2(VkCommandBuffer commandBuffer, + const VkResolveImageInfo2 *pResolveImageInfo) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + struct kk_meta_save save; + kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS); + vk_meta_resolve_image2(&cmd->vk, &dev->meta, pResolveImageInfo); + kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS); +} + +static void +kk_meta_init_render(struct kk_cmd_buffer *cmd, + struct vk_meta_rendering_info *info) +{ + const struct kk_rendering_state *render = &cmd->state.gfx.render; + + *info = (struct vk_meta_rendering_info){ + .samples = MAX2(render->samples, 1), + .view_mask = render->view_mask, + .color_attachment_count = render->color_att_count, + .depth_attachment_format = render->depth_att.vk_format, + .stencil_attachment_format = render->stencil_att.vk_format, + }; + for (uint32_t a = 0; a < render->color_att_count; a++) { + info->color_attachment_formats[a] = render->color_att[a].vk_format; + info->color_attachment_write_masks[a] = + VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, + const VkClearAttachment *pAttachments, + uint32_t rectCount, const VkClearRect *pRects) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + struct vk_meta_rendering_info render_info; + kk_meta_init_render(cmd, &render_info); + + uint32_t view_mask = cmd->state.gfx.render.view_mask; + struct kk_encoder *encoder = cmd->encoder; + uint32_t layer_ids[KK_MAX_MULTIVIEW_VIEW_COUNT] = {}; + mtl_set_vertex_amplification_count(encoder->main.encoder, layer_ids, 1u); + + struct kk_meta_save save; + kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS); + vk_meta_clear_attachments(&cmd->vk, &dev->meta, &render_info, + attachmentCount, pAttachments, rectCount, pRects); + kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS); + + uint32_t count = 0u; + u_foreach_bit(id, view_mask) + layer_ids[count++] = id; + if (view_mask == 0u) { + layer_ids[count++] = 0; + } + mtl_set_vertex_amplification_count(encoder->main.encoder, layer_ids, count); +} + +void +kk_meta_resolve_rendering(struct kk_cmd_buffer *cmd, + const VkRenderingInfo *pRenderingInfo) +{ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + struct kk_meta_save save; + kk_meta_begin(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS); + vk_meta_resolve_rendering(&cmd->vk, &dev->meta, pRenderingInfo); + kk_meta_end(cmd, &save, VK_PIPELINE_BIND_POINT_GRAPHICS); +} diff --git a/src/kosmickrisp/vulkan/kk_cmd_pool.c b/src/kosmickrisp/vulkan/kk_cmd_pool.c new file mode 100644 index 00000000000..15e571c9723 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_cmd_pool.c @@ -0,0 +1,64 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_cmd_pool.h" + +#include "kk_device.h" +#include "kk_entrypoints.h" +#include "kk_physical_device.h" + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateCommandPool(VkDevice _device, + const VkCommandPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkCommandPool *pCmdPool) +{ + VK_FROM_HANDLE(kk_device, device, _device); + struct kk_cmd_pool *pool; + + pool = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pool == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + VkResult result = + vk_command_pool_init(&device->vk, &pool->vk, pCreateInfo, pAllocator); + if (result != VK_SUCCESS) { + vk_free2(&device->vk.alloc, pAllocator, pool); + return result; + } + + list_inithead(&pool->free_mem); + list_inithead(&pool->free_gart_mem); + + *pCmdPool = kk_cmd_pool_to_handle(pool); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +kk_DestroyCommandPool(VkDevice _device, VkCommandPool commandPool, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_device, device, _device); + VK_FROM_HANDLE(kk_cmd_pool, pool, commandPool); + + if (!pool) + return; + + vk_command_pool_finish(&pool->vk); + vk_free2(&device->vk.alloc, pAllocator, pool); +} + +VKAPI_ATTR void VKAPI_CALL +kk_TrimCommandPool(VkDevice device, VkCommandPool commandPool, + VkCommandPoolTrimFlags flags) +{ + VK_FROM_HANDLE(kk_cmd_pool, pool, commandPool); + + vk_command_pool_trim(&pool->vk, flags); +} diff --git a/src/kosmickrisp/vulkan/kk_cmd_pool.h b/src/kosmickrisp/vulkan/kk_cmd_pool.h new file mode 100644 index 00000000000..c3a58f78524 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_cmd_pool.h @@ -0,0 +1,32 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_CMD_POOL_H +#define KK_CMD_POOL_H + +#include "kk_private.h" + +#include "vk_command_pool.h" + +struct kk_cmd_pool { + struct vk_command_pool vk; + + /** List of nvk_cmd_mem */ + struct list_head free_mem; + struct list_head free_gart_mem; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_cmd_pool, vk.base, VkCommandPool, + VK_OBJECT_TYPE_COMMAND_POOL) + +static inline struct kk_device * +kk_cmd_pool_device(struct kk_cmd_pool *pool) +{ + return (struct kk_device *)pool->vk.base.device; +} + +#endif /* KK_CMD_POOL_H */ diff --git a/src/kosmickrisp/vulkan/kk_debug.c b/src/kosmickrisp/vulkan/kk_debug.c new file mode 100644 index 00000000000..dfc0385be41 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_debug.c @@ -0,0 +1,22 @@ +/* + * Copyright 2025 LunarG, Inc. + * SPDX-License-Identifier: MIT + */ +#include "kk_debug.h" +#include "util/u_debug.h" + +enum kk_debug kk_mesa_debug_flags = 0; + +const struct debug_named_value flags[] = { + {"nir", KK_DEBUG_NIR}, + {"msl", KK_DEBUG_MSL}, + {NULL, 0}, +}; + +DEBUG_GET_ONCE_FLAGS_OPTION(mesa_kk_debug, "MESA_KK_DEBUG", flags, 0); + +void +kk_process_debug_variable(void) +{ + kk_mesa_debug_flags = debug_get_option_mesa_kk_debug(); +} diff --git a/src/kosmickrisp/vulkan/kk_debug.h b/src/kosmickrisp/vulkan/kk_debug.h new file mode 100644 index 00000000000..e50b3098c30 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_debug.h @@ -0,0 +1,21 @@ +/* + * Copyright 2025 LunarG, Inc. + * SPDX-License-Identifier: MIT + */ +#ifndef KK_DEBUG_H +#define KK_DEBUG_H 1 + +enum kk_debug { + /* Print out the NIR from the compiler */ + KK_DEBUG_NIR = 1ull << 0, + /* Print out the generated MSL source code from the compiler */ + KK_DEBUG_MSL = 1ull << 1, +}; + +extern enum kk_debug kk_mesa_debug_flags; + +#define KK_DEBUG(flag) unlikely(kk_mesa_debug_flags &KK_DEBUG_##flag) + +extern void kk_process_debug_variable(void); + +#endif /* KK_DEBUG_H */ diff --git a/src/kosmickrisp/vulkan/kk_descriptor_set.c b/src/kosmickrisp/vulkan/kk_descriptor_set.c new file mode 100644 index 00000000000..0637fddc812 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_descriptor_set.c @@ -0,0 +1,806 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_descriptor_set.h" + +#include "kk_bo.h" +#include "kk_buffer.h" +#include "kk_buffer_view.h" +#include "kk_descriptor_set_layout.h" +#include "kk_device.h" +#include "kk_entrypoints.h" +#include "kk_image_view.h" +#include "kk_physical_device.h" +#include "kk_sampler.h" + +#include "util/format/u_format.h" + +static inline uint32_t +align_u32(uint32_t v, uint32_t a) +{ + assert(a != 0 && a == (a & -a)); + return (v + a - 1) & ~(a - 1); +} + +static inline void * +desc_ubo_data(struct kk_descriptor_set *set, uint32_t binding, uint32_t elem, + uint32_t *size_out) +{ + const struct kk_descriptor_set_binding_layout *binding_layout = + &set->layout->binding[binding]; + + uint32_t offset = binding_layout->offset + elem * binding_layout->stride; + assert(offset < set->size); + + if (size_out != NULL) + *size_out = set->size - offset; + + return (char *)set->mapped_ptr + offset; +} + +static void +write_desc(struct kk_descriptor_set *set, uint32_t binding, uint32_t elem, + const void *desc_data, size_t desc_size) +{ + ASSERTED uint32_t dst_size; + void *dst = desc_ubo_data(set, binding, elem, &dst_size); + assert(desc_size <= dst_size); + memcpy(dst, desc_data, desc_size); +} + +static void +get_sampled_image_view_desc(VkDescriptorType descriptor_type, + const VkDescriptorImageInfo *const info, void *dst, + size_t dst_size, bool is_input_attachment) +{ + struct kk_sampled_image_descriptor desc[3] = {}; + uint8_t plane_count = 1; + + if (descriptor_type != VK_DESCRIPTOR_TYPE_SAMPLER && info && + info->imageView != VK_NULL_HANDLE) { + VK_FROM_HANDLE(kk_image_view, view, info->imageView); + + plane_count = view->plane_count; + for (uint8_t plane = 0; plane < plane_count; plane++) { + if (is_input_attachment) { + assert(view->planes[plane].sampled_gpu_resource_id); + desc[plane].image_gpu_resource_id = + view->planes[plane].input_gpu_resource_id; + } else { + assert(view->planes[plane].sampled_gpu_resource_id); + desc[plane].image_gpu_resource_id = + view->planes[plane].sampled_gpu_resource_id; + } + } + } + + if (descriptor_type == VK_DESCRIPTOR_TYPE_SAMPLER || + descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + VK_FROM_HANDLE(kk_sampler, sampler, info->sampler); + + plane_count = MAX2(plane_count, sampler->plane_count); + + for (uint8_t plane = 0; plane < plane_count; plane++) { + /* We need to replicate the last sampler plane out to all image + * planes due to sampler table entry limitations. See + * nvk_CreateSampler in nvk_sampler.c for more details. + */ + uint8_t sampler_plane = MIN2(plane, sampler->plane_count - 1u); + assert(sampler->planes[sampler_plane].hw->handle); + desc[plane].sampler_index = sampler->planes[sampler_plane].hw->index; + desc[plane].lod_bias_fp16 = sampler->lod_bias_fp16; + desc[plane].lod_min_fp16 = sampler->lod_min_fp16; + desc[plane].lod_max_fp16 = sampler->lod_max_fp16; + } + } + + assert(sizeof(desc[0]) * plane_count <= dst_size); + memcpy(dst, desc, sizeof(desc[0]) * plane_count); +} + +static void +write_sampled_image_view_desc(struct kk_descriptor_set *set, + const VkDescriptorImageInfo *const _info, + uint32_t binding, uint32_t elem, + VkDescriptorType descriptor_type) +{ + VkDescriptorImageInfo info = *_info; + + struct kk_descriptor_set_binding_layout *binding_layout = + &set->layout->binding[binding]; + if (descriptor_type == VK_DESCRIPTOR_TYPE_SAMPLER || + descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { + if (binding_layout->immutable_samplers != NULL) { + info.sampler = + kk_sampler_to_handle(binding_layout->immutable_samplers[elem]); + } + } + + uint32_t dst_size; + void *dst = desc_ubo_data(set, binding, elem, &dst_size); + get_sampled_image_view_desc( + descriptor_type, &info, dst, dst_size, + descriptor_type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT); +} + +static void +get_storage_image_view_desc( + struct kk_descriptor_set_binding_layout *binding_layout, + const VkDescriptorImageInfo *const info, void *dst, size_t dst_size) +{ + struct kk_storage_image_descriptor desc = {}; + + if (info && info->imageView != VK_NULL_HANDLE) { + VK_FROM_HANDLE(kk_image_view, view, info->imageView); + + /* Storage images are always single plane */ + assert(view->plane_count == 1); + uint8_t plane = 0; + + assert(view->planes[plane].storage_gpu_resource_id); + desc.image_gpu_resource_id = view->planes[plane].storage_gpu_resource_id; + } + + assert(sizeof(desc) <= dst_size); + memcpy(dst, &desc, sizeof(desc)); +} + +static void +write_storage_image_view_desc(struct kk_descriptor_set *set, + const VkDescriptorImageInfo *const info, + uint32_t binding, uint32_t elem) +{ + uint32_t dst_size; + void *dst = desc_ubo_data(set, binding, elem, &dst_size); + struct kk_descriptor_set_binding_layout *binding_layout = + &set->layout->binding[binding]; + get_storage_image_view_desc(binding_layout, info, dst, dst_size); +} + +static void +write_buffer_desc(struct kk_descriptor_set *set, + const VkDescriptorBufferInfo *const info, uint32_t binding, + uint32_t elem) +{ + VK_FROM_HANDLE(kk_buffer, buffer, info->buffer); + + const struct kk_addr_range addr_range = + kk_buffer_addr_range(buffer, info->offset, info->range); + assert(addr_range.range <= UINT32_MAX); + + const struct kk_buffer_address desc = { + .base_addr = addr_range.addr, + .size = addr_range.range, + }; + write_desc(set, binding, elem, &desc, sizeof(desc)); +} + +static void +write_dynamic_buffer_desc(struct kk_descriptor_set *set, + const VkDescriptorBufferInfo *const info, + uint32_t binding, uint32_t elem) +{ + VK_FROM_HANDLE(kk_buffer, buffer, info->buffer); + const struct kk_descriptor_set_binding_layout *binding_layout = + &set->layout->binding[binding]; + + const struct kk_addr_range addr_range = + kk_buffer_addr_range(buffer, info->offset, info->range); + assert(addr_range.range <= UINT32_MAX); + + struct kk_buffer_address *desc = + &set->dynamic_buffers[binding_layout->dynamic_buffer_index + elem]; + *desc = (struct kk_buffer_address){ + .base_addr = addr_range.addr, + .size = addr_range.range, + }; +} + +static void +write_buffer_view_desc(struct kk_descriptor_set *set, + const VkBufferView bufferView, uint32_t binding, + uint32_t elem) +{ + struct kk_storage_image_descriptor desc = {}; + if (bufferView != VK_NULL_HANDLE) { + VK_FROM_HANDLE(kk_buffer_view, view, bufferView); + + assert(view->mtl_texel_buffer_handle); + assert(view->texel_buffer_gpu_id); + + desc.image_gpu_resource_id = view->texel_buffer_gpu_id; + } + write_desc(set, binding, elem, &desc, sizeof(desc)); +} + +static void +write_inline_uniform_data(struct kk_descriptor_set *set, + const VkWriteDescriptorSetInlineUniformBlock *info, + uint32_t binding, uint32_t offset) +{ + assert(set->layout->binding[binding].stride == 1); + write_desc(set, binding, offset, info->pData, info->dataSize); +} + +VKAPI_ATTR void VKAPI_CALL +kk_UpdateDescriptorSets(VkDevice device, uint32_t descriptorWriteCount, + const VkWriteDescriptorSet *pDescriptorWrites, + uint32_t descriptorCopyCount, + const VkCopyDescriptorSet *pDescriptorCopies) +{ + for (uint32_t w = 0; w < descriptorWriteCount; w++) { + const VkWriteDescriptorSet *write = &pDescriptorWrites[w]; + VK_FROM_HANDLE(kk_descriptor_set, set, write->dstSet); + + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + write_sampled_image_view_desc( + set, write->pImageInfo + j, write->dstBinding, + write->dstArrayElement + j, write->descriptorType); + } + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + write_storage_image_view_desc(set, write->pImageInfo + j, + write->dstBinding, + write->dstArrayElement + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + write_buffer_view_desc(set, write->pTexelBufferView[j], + write->dstBinding, + write->dstArrayElement + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + write_buffer_desc(set, write->pBufferInfo + j, write->dstBinding, + write->dstArrayElement + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + write_dynamic_buffer_desc(set, write->pBufferInfo + j, + write->dstBinding, + write->dstArrayElement + j); + } + break; + + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: { + const VkWriteDescriptorSetInlineUniformBlock *write_inline = + vk_find_struct_const(write->pNext, + WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK); + assert(write_inline->dataSize == write->descriptorCount); + write_inline_uniform_data(set, write_inline, write->dstBinding, + write->dstArrayElement); + break; + } + + default: + break; + } + } + + for (uint32_t i = 0; i < descriptorCopyCount; i++) { + const VkCopyDescriptorSet *copy = &pDescriptorCopies[i]; + VK_FROM_HANDLE(kk_descriptor_set, src, copy->srcSet); + VK_FROM_HANDLE(kk_descriptor_set, dst, copy->dstSet); + + const struct kk_descriptor_set_binding_layout *src_binding_layout = + &src->layout->binding[copy->srcBinding]; + const struct kk_descriptor_set_binding_layout *dst_binding_layout = + &dst->layout->binding[copy->dstBinding]; + + if (dst_binding_layout->stride > 0 && src_binding_layout->stride > 0) { + for (uint32_t j = 0; j < copy->descriptorCount; j++) { + ASSERTED uint32_t dst_max_size, src_max_size; + void *dst_map = desc_ubo_data( + dst, copy->dstBinding, copy->dstArrayElement + j, &dst_max_size); + const void *src_map = desc_ubo_data( + src, copy->srcBinding, copy->srcArrayElement + j, &src_max_size); + const uint32_t copy_size = + MIN2(dst_binding_layout->stride, src_binding_layout->stride); + assert(copy_size <= dst_max_size && copy_size <= src_max_size); + memcpy(dst_map, src_map, copy_size); + } + } + + switch (src_binding_layout->type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { + const uint32_t dst_dyn_start = + dst_binding_layout->dynamic_buffer_index + copy->dstArrayElement; + const uint32_t src_dyn_start = + src_binding_layout->dynamic_buffer_index + copy->srcArrayElement; + typed_memcpy(&dst->dynamic_buffers[dst_dyn_start], + &src->dynamic_buffers[src_dyn_start], + copy->descriptorCount); + break; + } + default: + break; + } + } +} + +void +kk_push_descriptor_set_update(struct kk_push_descriptor_set *push_set, + uint32_t write_count, + const VkWriteDescriptorSet *writes) +{ + struct kk_descriptor_set_layout *layout = push_set->layout; + assert(layout->non_variable_descriptor_buffer_size < sizeof(push_set->data)); + struct kk_descriptor_set set = { + .layout = push_set->layout, + .size = sizeof(push_set->data), + .mapped_ptr = push_set->data, + }; + + for (uint32_t w = 0; w < write_count; w++) { + const VkWriteDescriptorSet *write = &writes[w]; + assert(write->dstSet == VK_NULL_HANDLE); + + switch (write->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + write_sampled_image_view_desc( + &set, write->pImageInfo + j, write->dstBinding, + write->dstArrayElement + j, write->descriptorType); + } + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + write_storage_image_view_desc(&set, write->pImageInfo + j, + write->dstBinding, + write->dstArrayElement + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + write_buffer_view_desc(&set, write->pTexelBufferView[j], + write->dstBinding, + write->dstArrayElement + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + for (uint32_t j = 0; j < write->descriptorCount; j++) { + write_buffer_desc(&set, write->pBufferInfo + j, write->dstBinding, + write->dstArrayElement + j); + } + break; + + default: + break; + } + } +} + +static void kk_descriptor_pool_free(struct kk_descriptor_pool *pool, + uint64_t addr, uint64_t size); + +static void +kk_descriptor_set_destroy(struct kk_device *dev, + struct kk_descriptor_pool *pool, + struct kk_descriptor_set *set) +{ + list_del(&set->link); + if (set->size > 0) + kk_descriptor_pool_free(pool, set->addr, set->size); + vk_descriptor_set_layout_unref(&dev->vk, &set->layout->vk); + + vk_object_free(&dev->vk, NULL, set); +} + +static void +kk_destroy_descriptor_pool(struct kk_device *dev, + const VkAllocationCallbacks *pAllocator, + struct kk_descriptor_pool *pool) +{ + list_for_each_entry_safe(struct kk_descriptor_set, set, &pool->sets, link) + kk_descriptor_set_destroy(dev, pool, set); + + util_vma_heap_finish(&pool->heap); + + if (pool->bo != NULL) + kk_destroy_bo(dev, pool->bo); + + vk_object_free(&dev->vk, pAllocator, pool); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateDescriptorPool(VkDevice _device, + const VkDescriptorPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorPool *pDescriptorPool) +{ + VK_FROM_HANDLE(kk_device, dev, _device); + struct kk_descriptor_pool *pool; + VkResult result = VK_SUCCESS; + + pool = vk_object_zalloc(&dev->vk, pAllocator, sizeof(*pool), + VK_OBJECT_TYPE_DESCRIPTOR_POOL); + if (!pool) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + list_inithead(&pool->sets); + + const VkMutableDescriptorTypeCreateInfoEXT *mutable_info = + vk_find_struct_const(pCreateInfo->pNext, + MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT); + + uint32_t max_align = 0; + for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) { + const VkMutableDescriptorTypeListEXT *type_list = NULL; + if (pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT && + mutable_info && i < mutable_info->mutableDescriptorTypeListCount) + type_list = &mutable_info->pMutableDescriptorTypeLists[i]; + + uint32_t stride, alignment; + kk_descriptor_stride_align_for_type(pCreateInfo->pPoolSizes[i].type, + type_list, &stride, &alignment); + max_align = MAX2(max_align, alignment); + } + + uint64_t mem_size = 0; + for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) { + const VkMutableDescriptorTypeListEXT *type_list = NULL; + if (pCreateInfo->pPoolSizes[i].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT && + mutable_info && i < mutable_info->mutableDescriptorTypeListCount) + type_list = &mutable_info->pMutableDescriptorTypeLists[i]; + + uint32_t stride, alignment; + kk_descriptor_stride_align_for_type(pCreateInfo->pPoolSizes[i].type, + type_list, &stride, &alignment); + mem_size += + MAX2(stride, max_align) * pCreateInfo->pPoolSizes[i].descriptorCount; + } + + /* Individual descriptor sets are aligned to the min UBO alignment to + * ensure that we don't end up with unaligned data access in any shaders. + * This means that each descriptor buffer allocated may burn up to 16B of + * extra space to get the right alignment. (Technically, it's at most 28B + * because we're always going to start at least 4B aligned but we're being + * conservative here.) Allocate enough extra space that we can chop it + * into maxSets pieces and align each one of them to 32B. + */ + mem_size += kk_min_cbuf_alignment() * pCreateInfo->maxSets; + + if (mem_size) { + result = kk_alloc_bo(dev, &dev->vk.base, mem_size, 0u, &pool->bo); + if (result != VK_SUCCESS) { + kk_destroy_descriptor_pool(dev, pAllocator, pool); + return result; + } + + /* The BO may be larger thanks to GPU page alignment. We may as well + * make that extra space available to the client. + */ + assert(pool->bo->size_B >= mem_size); + util_vma_heap_init(&pool->heap, pool->bo->gpu, pool->bo->size_B); + } else { + util_vma_heap_init(&pool->heap, 0, 0); + } + + *pDescriptorPool = kk_descriptor_pool_to_handle(pool); + return result; +} + +static VkResult +kk_descriptor_pool_alloc(struct kk_descriptor_pool *pool, uint64_t size, + uint64_t alignment, uint64_t *addr_out, void **map_out) +{ + assert(size > 0); + assert(size % alignment == 0); + + if (size > pool->heap.free_size) + return VK_ERROR_OUT_OF_POOL_MEMORY; + + uint64_t addr = util_vma_heap_alloc(&pool->heap, size, alignment); + if (addr == 0) + return VK_ERROR_FRAGMENTED_POOL; + + assert(addr >= pool->bo->gpu); + assert(addr + size <= pool->bo->gpu + pool->bo->size_B); + uint64_t offset = addr - pool->bo->gpu; + + *addr_out = addr; + *map_out = pool->bo->cpu + offset; + + return VK_SUCCESS; +} + +static void +kk_descriptor_pool_free(struct kk_descriptor_pool *pool, uint64_t addr, + uint64_t size) +{ + assert(size > 0); + assert(addr >= pool->bo->gpu); + assert(addr + size <= pool->bo->gpu + pool->bo->size_B); + util_vma_heap_free(&pool->heap, addr, size); +} + +static VkResult +kk_descriptor_set_create(struct kk_device *dev, struct kk_descriptor_pool *pool, + struct kk_descriptor_set_layout *layout, + uint32_t variable_count, + struct kk_descriptor_set **out_set) +{ + struct kk_descriptor_set *set; + VkResult result = VK_SUCCESS; + + uint32_t mem_size = + sizeof(struct kk_descriptor_set) + + layout->dynamic_buffer_count * sizeof(struct kk_buffer_address); + set = + vk_object_zalloc(&dev->vk, NULL, mem_size, VK_OBJECT_TYPE_DESCRIPTOR_SET); + if (!set) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + set->size = layout->non_variable_descriptor_buffer_size; + + if (layout->binding_count > 0 && + (layout->binding[layout->binding_count - 1].flags & + VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) { + uint32_t stride = layout->binding[layout->binding_count - 1].stride; + set->size += stride * variable_count; + } + + uint32_t alignment = kk_min_cbuf_alignment(); + set->size = align64(set->size, alignment); + + if (set->size > 0) { + result = kk_descriptor_pool_alloc(pool, set->size, alignment, &set->addr, + &set->mapped_ptr); + if (result != VK_SUCCESS) { + vk_object_free(&dev->vk, NULL, set); + return result; + } + set->mtl_descriptor_buffer = pool->bo->map; + } + + vk_descriptor_set_layout_ref(&layout->vk); + set->layout = layout; + + for (uint32_t b = 0; b < layout->binding_count; b++) { + if (layout->binding[b].type != VK_DESCRIPTOR_TYPE_SAMPLER && + layout->binding[b].type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + continue; + + if (layout->binding[b].immutable_samplers == NULL) + continue; + + uint32_t array_size = layout->binding[b].array_size; + if (layout->binding[b].flags & + VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT) + array_size = variable_count; + + const VkDescriptorImageInfo empty = {}; + for (uint32_t j = 0; j < array_size; j++) { + write_sampled_image_view_desc(set, &empty, b, j, + layout->binding[b].type); + } + } + + list_addtail(&set->link, &pool->sets); + *out_set = set; + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_AllocateDescriptorSets(VkDevice device, + const VkDescriptorSetAllocateInfo *pAllocateInfo, + VkDescriptorSet *pDescriptorSets) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_descriptor_pool, pool, pAllocateInfo->descriptorPool); + + VkResult result = VK_SUCCESS; + uint32_t i; + + struct kk_descriptor_set *set = NULL; + + const VkDescriptorSetVariableDescriptorCountAllocateInfo *var_desc_count = + vk_find_struct_const( + pAllocateInfo->pNext, + DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO); + + /* allocate a set of buffers for each shader to contain descriptors */ + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + VK_FROM_HANDLE(kk_descriptor_set_layout, layout, + pAllocateInfo->pSetLayouts[i]); + /* If descriptorSetCount is zero or this structure is not included in + * the pNext chain, then the variable lengths are considered to be zero. + */ + const uint32_t variable_count = + var_desc_count && var_desc_count->descriptorSetCount > 0 + ? var_desc_count->pDescriptorCounts[i] + : 0; + + result = + kk_descriptor_set_create(dev, pool, layout, variable_count, &set); + if (result != VK_SUCCESS) + break; + + pDescriptorSets[i] = kk_descriptor_set_to_handle(set); + } + + if (result != VK_SUCCESS) { + kk_FreeDescriptorSets(device, pAllocateInfo->descriptorPool, i, + pDescriptorSets); + for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) { + pDescriptorSets[i] = VK_NULL_HANDLE; + } + } + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_FreeDescriptorSets(VkDevice device, VkDescriptorPool descriptorPool, + uint32_t descriptorSetCount, + const VkDescriptorSet *pDescriptorSets) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_descriptor_pool, pool, descriptorPool); + + for (uint32_t i = 0; i < descriptorSetCount; i++) { + VK_FROM_HANDLE(kk_descriptor_set, set, pDescriptorSets[i]); + + if (set) + kk_descriptor_set_destroy(dev, pool, set); + } + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +kk_DestroyDescriptorPool(VkDevice device, VkDescriptorPool _pool, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_descriptor_pool, pool, _pool); + + if (!_pool) + return; + + kk_destroy_descriptor_pool(dev, pAllocator, pool); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_ResetDescriptorPool(VkDevice device, VkDescriptorPool descriptorPool, + VkDescriptorPoolResetFlags flags) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_descriptor_pool, pool, descriptorPool); + + list_for_each_entry_safe(struct kk_descriptor_set, set, &pool->sets, link) + kk_descriptor_set_destroy(dev, pool, set); + + return VK_SUCCESS; +} + +static void +kk_descriptor_set_write_template( + struct kk_descriptor_set *set, + const struct vk_descriptor_update_template *template, const void *data) +{ + for (uint32_t i = 0; i < template->entry_count; i++) { + const struct vk_descriptor_template_entry *entry = &template->entries[i]; + + switch (entry->type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + for (uint32_t j = 0; j < entry->array_count; j++) { + const VkDescriptorImageInfo *info = + data + entry->offset + j * entry->stride; + + write_sampled_image_view_desc(set, info, entry->binding, + entry->array_element + j, + entry->type); + } + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + for (uint32_t j = 0; j < entry->array_count; j++) { + const VkDescriptorImageInfo *info = + data + entry->offset + j * entry->stride; + + write_storage_image_view_desc(set, info, entry->binding, + entry->array_element + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + for (uint32_t j = 0; j < entry->array_count; j++) { + const VkBufferView *bview = + data + entry->offset + j * entry->stride; + + write_buffer_view_desc(set, *bview, entry->binding, + entry->array_element + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + for (uint32_t j = 0; j < entry->array_count; j++) { + const VkDescriptorBufferInfo *info = + data + entry->offset + j * entry->stride; + + write_buffer_desc(set, info, entry->binding, + entry->array_element + j); + } + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + for (uint32_t j = 0; j < entry->array_count; j++) { + const VkDescriptorBufferInfo *info = + data + entry->offset + j * entry->stride; + + write_dynamic_buffer_desc(set, info, entry->binding, + entry->array_element + j); + } + break; + + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: + write_desc(set, entry->binding, entry->array_element, + data + entry->offset, entry->array_count); + break; + + default: + break; + } + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_UpdateDescriptorSetWithTemplate( + VkDevice device, VkDescriptorSet descriptorSet, + VkDescriptorUpdateTemplate descriptorUpdateTemplate, const void *pData) +{ + VK_FROM_HANDLE(kk_descriptor_set, set, descriptorSet); + VK_FROM_HANDLE(vk_descriptor_update_template, template, + descriptorUpdateTemplate); + + kk_descriptor_set_write_template(set, template, pData); +} + +void +kk_push_descriptor_set_update_template( + struct kk_push_descriptor_set *push_set, + struct kk_descriptor_set_layout *layout, + const struct vk_descriptor_update_template *template, const void *data) +{ + struct kk_descriptor_set tmp_set = { + .layout = layout, + .size = sizeof(push_set->data), + .mapped_ptr = push_set->data, + }; + kk_descriptor_set_write_template(&tmp_set, template, data); +} diff --git a/src/kosmickrisp/vulkan/kk_descriptor_set.h b/src/kosmickrisp/vulkan/kk_descriptor_set.h new file mode 100644 index 00000000000..d1652448a41 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_descriptor_set.h @@ -0,0 +1,81 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_DESCRIPTOR_SET +#define KK_DESCRIPTOR_SET 1 + +#include "kk_private.h" + +#include "kk_descriptor_types.h" +#include "kk_device.h" + +#include "vk_descriptor_update_template.h" +#include "vk_object.h" + +#include "util/list.h" +#include "util/vma.h" + +struct kk_descriptor_set_layout; +struct kk_bo; + +struct kk_descriptor_pool { + struct vk_object_base base; + + struct list_head sets; + + struct kk_bo *bo; + struct util_vma_heap heap; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_descriptor_pool, base, VkDescriptorPool, + VK_OBJECT_TYPE_DESCRIPTOR_POOL) + +struct kk_descriptor_set { + struct vk_object_base base; + + /* Link in kk_descriptor_pool::sets */ + struct list_head link; + + struct kk_descriptor_set_layout *layout; + mtl_resource *mtl_descriptor_buffer; + void *mapped_ptr; + uint64_t addr; + uint32_t size; + + struct kk_buffer_address dynamic_buffers[]; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_descriptor_set, base, VkDescriptorSet, + VK_OBJECT_TYPE_DESCRIPTOR_SET) + +static inline struct kk_buffer_address +kk_descriptor_set_addr(const struct kk_descriptor_set *set) +{ + return (struct kk_buffer_address){ + .base_addr = set->addr, + .size = set->size, + }; +} + +struct kk_push_descriptor_set { + uint8_t data[KK_PUSH_DESCRIPTOR_SET_SIZE]; + struct kk_descriptor_set_layout *layout; + mtl_resource *mtl_descriptor_buffer; + uint32_t resource_count; + mtl_resource *mtl_resources[]; +}; + +void kk_push_descriptor_set_update(struct kk_push_descriptor_set *push_set, + uint32_t write_count, + const VkWriteDescriptorSet *writes); + +void kk_push_descriptor_set_update_template( + struct kk_push_descriptor_set *push_set, + struct kk_descriptor_set_layout *layout, + const struct vk_descriptor_update_template *template, const void *data); + +#endif diff --git a/src/kosmickrisp/vulkan/kk_descriptor_set_layout.c b/src/kosmickrisp/vulkan/kk_descriptor_set_layout.c new file mode 100644 index 00000000000..11b4c98b9f6 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_descriptor_set_layout.c @@ -0,0 +1,496 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_descriptor_set_layout.h" + +#include "kk_descriptor_types.h" +#include "kk_device.h" +#include "kk_entrypoints.h" +#include "kk_physical_device.h" +#include "kk_sampler.h" + +#include "vk_pipeline_layout.h" + +static bool +binding_has_immutable_samplers(const VkDescriptorSetLayoutBinding *binding) +{ + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + return binding->pImmutableSamplers != NULL; + + default: + return false; + } +} + +void +kk_descriptor_stride_align_for_type( + VkDescriptorType type, const VkMutableDescriptorTypeListEXT *type_list, + uint32_t *stride, uint32_t *alignment) +{ + switch (type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + /* TODO: How do samplers work? */ + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + *stride = *alignment = sizeof(struct kk_sampled_image_descriptor); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + *stride = *alignment = sizeof(struct kk_storage_image_descriptor); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + *stride = *alignment = sizeof(struct kk_buffer_address); + break; + + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + *stride = *alignment = 0; /* These don't take up buffer space */ + break; + + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: + *stride = 1; /* Array size is bytes */ + *alignment = kk_min_cbuf_alignment(); + break; + + case VK_DESCRIPTOR_TYPE_MUTABLE_EXT: + *stride = *alignment = 0; + if (type_list == NULL) + *stride = *alignment = KK_MAX_DESCRIPTOR_SIZE; + for (unsigned i = 0; type_list && i < type_list->descriptorTypeCount; + i++) { + /* This shouldn't recurse */ + assert(type_list->pDescriptorTypes[i] != + VK_DESCRIPTOR_TYPE_MUTABLE_EXT); + uint32_t desc_stride, desc_align; + kk_descriptor_stride_align_for_type(type_list->pDescriptorTypes[i], + NULL, &desc_stride, &desc_align); + *stride = MAX2(*stride, desc_stride); + *alignment = MAX2(*alignment, desc_align); + } + *stride = ALIGN(*stride, *alignment); + break; + + default: + UNREACHABLE("Invalid descriptor type"); + } + + assert(*stride <= KK_MAX_DESCRIPTOR_SIZE); +} + +static const VkMutableDescriptorTypeListEXT * +kk_descriptor_get_type_list(VkDescriptorType type, + const VkMutableDescriptorTypeCreateInfoEXT *info, + const uint32_t info_idx) +{ + const VkMutableDescriptorTypeListEXT *type_list = NULL; + if (type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) { + assert(info != NULL); + assert(info_idx < info->mutableDescriptorTypeListCount); + type_list = &info->pMutableDescriptorTypeLists[info_idx]; + } + return type_list; +} + +static void +kk_descriptor_set_layout_destroy(struct vk_device *vk_dev, + struct vk_descriptor_set_layout *vk_layout) +{ + struct kk_device *dev = container_of(vk_dev, struct kk_device, vk); + struct kk_descriptor_set_layout *layout = + vk_to_kk_descriptor_set_layout(vk_layout); + + vk_object_free(&dev->vk, NULL, layout); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateDescriptorSetLayout(VkDevice device, + const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkDescriptorSetLayout *pSetLayout) +{ + VK_FROM_HANDLE(kk_device, dev, device); + + uint32_t num_bindings = 0; + uint32_t immutable_sampler_count = 0; + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j]; + num_bindings = MAX2(num_bindings, binding->binding + 1); + + /* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding: + * + * "If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or + * VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then + * pImmutableSamplers can be used to initialize a set of immutable + * samplers. [...] If descriptorType is not one of these descriptor + * types, then pImmutableSamplers is ignored. + * + * We need to be careful here and only parse pImmutableSamplers if we + * have one of the right descriptor types. + */ + if (binding_has_immutable_samplers(binding)) + immutable_sampler_count += binding->descriptorCount; + } + + VK_MULTIALLOC(ma); + VK_MULTIALLOC_DECL(&ma, struct kk_descriptor_set_layout, layout, 1); + VK_MULTIALLOC_DECL(&ma, struct kk_descriptor_set_binding_layout, bindings, + num_bindings); + VK_MULTIALLOC_DECL(&ma, struct kk_sampler *, samplers, + immutable_sampler_count); + + if (!vk_descriptor_set_layout_multizalloc(&dev->vk, &ma, pCreateInfo)) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + layout->vk.destroy = kk_descriptor_set_layout_destroy; + layout->flags = pCreateInfo->flags; + layout->binding_count = num_bindings; + + for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) { + const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j]; + uint32_t b = binding->binding; + /* We temporarily store pCreateInfo->pBindings[] index (plus one) in the + * immutable_samplers pointer. This provides us with a quick-and-dirty + * way to sort the bindings by binding number. + */ + layout->binding[b].immutable_samplers = (void *)(uintptr_t)(j + 1); + } + + const VkDescriptorSetLayoutBindingFlagsCreateInfo *binding_flags_info = + vk_find_struct_const(pCreateInfo->pNext, + DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO); + const VkMutableDescriptorTypeCreateInfoEXT *mutable_info = + vk_find_struct_const(pCreateInfo->pNext, + MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT); + + uint32_t buffer_size = 0; + uint32_t max_variable_descriptor_size = 0; + uint8_t dynamic_buffer_count = 0; + uint32_t total_descriptor_count = 0u; + for (uint32_t b = 0; b < num_bindings; b++) { + /* We stashed the pCreateInfo->pBindings[] index (plus one) in the + * immutable_samplers pointer. Check for NULL (empty binding) and then + * reset it and compute the index. + */ + if (layout->binding[b].immutable_samplers == NULL) + continue; + const uint32_t info_idx = + (uintptr_t)(void *)layout->binding[b].immutable_samplers - 1; + layout->binding[b].immutable_samplers = NULL; + + const VkDescriptorSetLayoutBinding *binding = + &pCreateInfo->pBindings[info_idx]; + + if (binding->descriptorCount == 0) + continue; + + layout->binding[b].type = binding->descriptorType; + layout->binding[b].mtl_resources_index = total_descriptor_count; + layout->descriptor_count += binding->descriptorCount; + + if (binding_flags_info && binding_flags_info->bindingCount > 0) { + assert(binding_flags_info->bindingCount == pCreateInfo->bindingCount); + layout->binding[b].flags = binding_flags_info->pBindingFlags[info_idx]; + } + + layout->binding[b].array_size = binding->descriptorCount; + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + layout->binding[b].dynamic_buffer_index = dynamic_buffer_count; + BITSET_SET_RANGE(layout->dynamic_ubos, dynamic_buffer_count, + dynamic_buffer_count + binding->descriptorCount - 1); + dynamic_buffer_count += binding->descriptorCount; + break; + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + layout->binding[b].dynamic_buffer_index = dynamic_buffer_count; + dynamic_buffer_count += binding->descriptorCount; + break; + + default: + break; + } + + const VkMutableDescriptorTypeListEXT *type_list = + kk_descriptor_get_type_list(binding->descriptorType, mutable_info, + info_idx); + + uint32_t stride, alignment; + kk_descriptor_stride_align_for_type(binding->descriptorType, type_list, + &stride, &alignment); + + uint8_t max_plane_count = 1; + + if (binding_has_immutable_samplers(binding)) { + layout->binding[b].immutable_samplers = samplers; + samplers += binding->descriptorCount; + for (uint32_t i = 0; i < binding->descriptorCount; i++) { + VK_FROM_HANDLE(kk_sampler, sampler, binding->pImmutableSamplers[i]); + layout->binding[b].immutable_samplers[i] = sampler; + const uint8_t sampler_plane_count = + sampler->vk.ycbcr_conversion + ? vk_format_get_plane_count( + sampler->vk.ycbcr_conversion->state.format) + : 1; + if (max_plane_count < sampler_plane_count) + max_plane_count = sampler_plane_count; + } + } + + stride *= max_plane_count; + layout->binding[b].count_per_element = max_plane_count; + total_descriptor_count += max_plane_count * binding->descriptorCount; + + if (stride > 0) { + assert(stride <= UINT8_MAX); + assert(util_is_power_of_two_nonzero(alignment)); + + buffer_size = align64(buffer_size, alignment); + layout->binding[b].offset = buffer_size; + layout->binding[b].stride = stride; + + if (layout->binding[b].flags & + VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT) { + /* From the Vulkan 1.3.256 spec: + * + * VUID-VkDescriptorSetLayoutBindingFlagsCreateInfo-pBindingFlags-03004 + * "If an element of pBindingFlags includes + * VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT, then + * all other elements of + * VkDescriptorSetLayoutCreateInfo::pBindings must have a + * smaller value of binding" + * + * In other words, it has to be the last binding. + */ + assert(b == num_bindings - 1); + assert(max_variable_descriptor_size == 0); + max_variable_descriptor_size = stride * binding->descriptorCount; + } else { + /* the allocation size will be computed at descriptor allocation, + * but the buffer size will be already aligned as this binding will + * be the last + */ + buffer_size += stride * binding->descriptorCount; + } + } + } + + layout->non_variable_descriptor_buffer_size = buffer_size; + layout->max_buffer_size = buffer_size + max_variable_descriptor_size; + layout->dynamic_buffer_count = dynamic_buffer_count; + + struct mesa_blake3 blake3_ctx; + _mesa_blake3_init(&blake3_ctx); + +#define BLAKE3_UPDATE_VALUE(x) \ + _mesa_blake3_update(&blake3_ctx, &(x), sizeof(x)); + BLAKE3_UPDATE_VALUE(layout->non_variable_descriptor_buffer_size); + BLAKE3_UPDATE_VALUE(layout->dynamic_buffer_count); + BLAKE3_UPDATE_VALUE(layout->binding_count); + + for (uint32_t b = 0; b < num_bindings; b++) { + BLAKE3_UPDATE_VALUE(layout->binding[b].type); + BLAKE3_UPDATE_VALUE(layout->binding[b].flags); + BLAKE3_UPDATE_VALUE(layout->binding[b].array_size); + BLAKE3_UPDATE_VALUE(layout->binding[b].offset); + BLAKE3_UPDATE_VALUE(layout->binding[b].stride); + BLAKE3_UPDATE_VALUE(layout->binding[b].dynamic_buffer_index); + + if (layout->binding[b].immutable_samplers != NULL) { + for (uint32_t i = 0; i < layout->binding[b].array_size; i++) { + const struct kk_sampler *sampler = + layout->binding[b].immutable_samplers[i]; + + /* We zalloc the object, so it's safe to hash the whole thing */ + if (sampler != NULL && sampler->vk.ycbcr_conversion != NULL) + BLAKE3_UPDATE_VALUE(sampler->vk.ycbcr_conversion->state); + } + } + } +#undef BLAKE3_UPDATE_VALUE + + _mesa_blake3_final(&blake3_ctx, layout->vk.blake3); + + if (pCreateInfo->flags & + VK_DESCRIPTOR_SET_LAYOUT_CREATE_EMBEDDED_IMMUTABLE_SAMPLERS_BIT_EXT) { + void *sampler_desc_data = + vk_alloc2(&dev->vk.alloc, pAllocator, buffer_size, 4, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + if (sampler_desc_data == NULL) { + kk_descriptor_set_layout_destroy(&dev->vk, &layout->vk); + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + for (uint32_t b = 0; b < num_bindings; b++) { + assert(layout->binding[b].type == VK_DESCRIPTOR_TYPE_SAMPLER); + assert(layout->binding[b].array_size == 1); + assert(layout->binding[b].immutable_samplers != NULL); + assert(!(layout->binding[b].flags & + VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)); + + /* I'm paranoid */ + if (layout->binding[b].immutable_samplers == NULL) + continue; + + struct kk_sampler *sampler = layout->binding[b].immutable_samplers[0]; + + /* YCbCr has to come in through a combined image/sampler */ + assert(sampler->plane_count == 1); + + assert(sampler->planes[0].hw->handle); + } + + vk_free2(&dev->vk.alloc, pAllocator, sampler_desc_data); + } + + *pSetLayout = kk_descriptor_set_layout_to_handle(layout); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetDescriptorSetLayoutSupport( + VkDevice device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo, + VkDescriptorSetLayoutSupport *pSupport) +{ + const VkMutableDescriptorTypeCreateInfoEXT *mutable_info = + vk_find_struct_const(pCreateInfo->pNext, + MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_EXT); + const VkDescriptorSetLayoutBindingFlagsCreateInfo *binding_flags = + vk_find_struct_const(pCreateInfo->pNext, + DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO); + + /* Figure out the maximum alignment up-front. Otherwise, we need to sort + * the list of descriptors by binding number in order to get the size + * accumulation right. + */ + uint32_t max_align = 0; + for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { + const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[i]; + const VkMutableDescriptorTypeListEXT *type_list = + kk_descriptor_get_type_list(binding->descriptorType, mutable_info, i); + + uint32_t stride, alignment; + kk_descriptor_stride_align_for_type(binding->descriptorType, type_list, + &stride, &alignment); + max_align = MAX2(max_align, alignment); + } + + uint64_t non_variable_size = 0; + uint32_t variable_stride = 0; + uint32_t variable_count = 0; + uint8_t dynamic_buffer_count = 0; + + for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) { + const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[i]; + + VkDescriptorBindingFlags flags = 0; + if (binding_flags != NULL && binding_flags->bindingCount > 0) + flags = binding_flags->pBindingFlags[i]; + + switch (binding->descriptorType) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + dynamic_buffer_count += binding->descriptorCount; + break; + default: + break; + } + + const VkMutableDescriptorTypeListEXT *type_list = + kk_descriptor_get_type_list(binding->descriptorType, mutable_info, i); + + uint32_t stride, alignment; + kk_descriptor_stride_align_for_type(binding->descriptorType, type_list, + &stride, &alignment); + + if (stride > 0) { + assert(stride <= UINT8_MAX); + assert(util_is_power_of_two_nonzero(alignment)); + + if (flags & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT) { + /* From the Vulkan 1.3.256 spec: + * + * "For the purposes of this command, a variable-sized + * descriptor binding with a descriptorCount of zero is treated + * as if the descriptorCount is one" + */ + variable_count = MAX2(1, binding->descriptorCount); + variable_stride = stride; + } else { + /* Since we're aligning to the maximum and since this is just a + * check for whether or not the max buffer size is big enough, we + * keep non_variable_size aligned to max_align. + */ + non_variable_size += stride * binding->descriptorCount; + non_variable_size = align64(non_variable_size, max_align); + } + } + } + + uint64_t buffer_size = non_variable_size; + if (variable_stride > 0) { + buffer_size += variable_stride * variable_count; + buffer_size = align64(buffer_size, max_align); + } + + uint32_t max_buffer_size; + if (pCreateInfo->flags & + VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR) + max_buffer_size = KK_PUSH_DESCRIPTOR_SET_SIZE; + else + max_buffer_size = KK_MAX_DESCRIPTOR_SET_SIZE; + + pSupport->supported = dynamic_buffer_count <= KK_MAX_DYNAMIC_BUFFERS && + buffer_size <= max_buffer_size; + + vk_foreach_struct(ext, pSupport->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT: { + VkDescriptorSetVariableDescriptorCountLayoutSupport *vs = (void *)ext; + if (variable_stride > 0) { + vs->maxVariableDescriptorCount = + (max_buffer_size - non_variable_size) / variable_stride; + } else { + vs->maxVariableDescriptorCount = 0; + } + break; + } + + default: + vk_debug_ignored_stype(ext->sType); + break; + } + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetDescriptorSetLayoutSizeEXT(VkDevice device, VkDescriptorSetLayout _layout, + VkDeviceSize *pLayoutSizeInBytes) +{ + VK_FROM_HANDLE(kk_descriptor_set_layout, layout, _layout); + + *pLayoutSizeInBytes = layout->max_buffer_size; +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetDescriptorSetLayoutBindingOffsetEXT(VkDevice device, + VkDescriptorSetLayout _layout, + uint32_t binding, + VkDeviceSize *pOffset) +{ + VK_FROM_HANDLE(kk_descriptor_set_layout, layout, _layout); + + *pOffset = layout->binding[binding].offset; +} diff --git a/src/kosmickrisp/vulkan/kk_descriptor_set_layout.h b/src/kosmickrisp/vulkan/kk_descriptor_set_layout.h new file mode 100644 index 00000000000..14ecda0f62a --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_descriptor_set_layout.h @@ -0,0 +1,103 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_DESCRIPTOR_SET_LAYOUT +#define KK_DESCRIPTOR_SET_LAYOUT 1 + +#include "kk_private.h" + +#include "vk_descriptor_set_layout.h" +#include "vk_object.h" + +#include "util/bitset.h" + +struct kk_device; +struct kk_physical_device; +struct kk_sampler; +struct vk_pipeline_layout; + +struct kk_descriptor_set_binding_layout { + /* The type of the descriptors in this binding */ + VkDescriptorType type; + + /* Flags provided when this binding was created */ + VkDescriptorBindingFlags flags; + + /* Number of array elements in this binding (or size in bytes for inline + * uniform data) + */ + uint32_t array_size; + + /* Number of actual descriptors per element */ + uint32_t count_per_element; + + /* Offset into the descriptor buffer where this descriptor lives */ + uint32_t offset; + + /* Offset to the mtl_resource_ids array where this descriptor stores them */ + uint32_t mtl_resources_index; + + /* Stride between array elements in the descriptor buffer */ + uint8_t stride; + + /* Index into the dynamic buffer binding array */ + uint8_t dynamic_buffer_index; + + /* Immutable samplers (or NULL if no immutable samplers) */ + struct kk_sampler **immutable_samplers; +}; + +struct kk_descriptor_set_layout { + struct vk_descriptor_set_layout vk; + + VkDescriptorSetLayoutCreateFlagBits flags; + + /* Size of the descriptor buffer for this descriptor set */ + /* Does not contain the size needed for variable count descriptors */ + uint32_t non_variable_descriptor_buffer_size; + + /* Maximum possible buffer size for this descriptor set */ + uint32_t max_buffer_size; + + /* Number of dynamic UBO bindings in this set */ + uint8_t dynamic_buffer_count; + + /* Which dynamic buffers are UBOs */ + BITSET_DECLARE(dynamic_ubos, KK_MAX_DYNAMIC_BUFFERS); + + /* Number of bindings in this descriptor set */ + uint32_t binding_count; + + /* Number of descriptors in the layout */ + uint32_t descriptor_count; + + /* Address to the embedded sampler descriptor buffer. + * + * This is allocated from nvk_device::heap and has the size + * non_variable_descriptor_buffer_size. + */ + uint64_t embedded_samplers_addr; + + /* Bindings in this descriptor set */ + struct kk_descriptor_set_binding_layout binding[0]; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_descriptor_set_layout, vk.base, + VkDescriptorSetLayout, + VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) + +void kk_descriptor_stride_align_for_type( + VkDescriptorType type, const VkMutableDescriptorTypeListEXT *type_list, + uint32_t *stride, uint32_t *alignment); + +static inline struct kk_descriptor_set_layout * +vk_to_kk_descriptor_set_layout(struct vk_descriptor_set_layout *layout) +{ + return container_of(layout, struct kk_descriptor_set_layout, vk); +} + +#endif /* KK_DESCRIPTOR_SET_LAYOUT */ diff --git a/src/kosmickrisp/vulkan/kk_descriptor_types.h b/src/kosmickrisp/vulkan/kk_descriptor_types.h new file mode 100644 index 00000000000..5296366c8c6 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_descriptor_types.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2024 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ +#ifndef KK_DESCRIPTOR_TYPES +#define KK_DESCRIPTOR_TYPES 1 + +#include "kk_private.h" + +/* TODO_KOSMICKRISP Reduce size to 32 bytes by moving border to a heap. */ +struct kk_sampled_image_descriptor { + uint64_t image_gpu_resource_id; + uint16_t sampler_index; + uint16_t lod_bias_fp16; + uint16_t lod_min_fp16; + uint16_t lod_max_fp16; + uint32_t has_border; + uint32_t pad_to_64_bits; + uint32_t border[4]; + uint64_t pad_to_power_2[3]; +}; + +static_assert(sizeof(struct kk_sampled_image_descriptor) == 64, + "kk_sampled_image_descriptor has no holes"); + +struct kk_storage_image_descriptor { + uint64_t image_gpu_resource_id; +}; + +static_assert(sizeof(struct kk_storage_image_descriptor) == 8, + "kk_storage_image_descriptor has no holes"); + +/* This has to match nir_address_format_64bit_bounded_global */ +struct kk_buffer_address { + uint64_t base_addr; + uint32_t size; + uint32_t zero; /* Must be zero! */ +}; + +static_assert(sizeof(struct kk_buffer_address) == 16, + "kk_buffer_address has no holes"); + +#endif /* KK_DESCRIPTOR_TYPES */ diff --git a/src/kosmickrisp/vulkan/kk_device.c b/src/kosmickrisp/vulkan/kk_device.c new file mode 100644 index 00000000000..0581b7d4bda --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_device.c @@ -0,0 +1,348 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_device.h" + +#include "kk_cmd_buffer.h" +#include "kk_entrypoints.h" +#include "kk_instance.h" +#include "kk_physical_device.h" +#include "kk_shader.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +#include "vk_cmd_enqueue_entrypoints.h" +#include "vk_common_entrypoints.h" + +#include "vulkan/wsi/wsi_common.h" +#include "vk_pipeline_cache.h" + +#include <time.h> + +DERIVE_HASH_TABLE(mtl_sampler_packed); + +static VkResult +kk_init_sampler_heap(struct kk_device *dev, struct kk_sampler_heap *h) +{ + h->ht = mtl_sampler_packed_table_create(NULL); + if (!h->ht) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + VkResult result = kk_query_table_init(dev, &h->table, 1024); + + if (result != VK_SUCCESS) { + ralloc_free(h->ht); + return result; + } + + simple_mtx_init(&h->lock, mtx_plain); + return VK_SUCCESS; +} + +static void +kk_destroy_sampler_heap(struct kk_device *dev, struct kk_sampler_heap *h) +{ + struct hash_entry *entry = _mesa_hash_table_next_entry(h->ht, NULL); + while (entry) { + struct kk_rc_sampler *sampler = (struct kk_rc_sampler *)entry->data; + mtl_release(sampler->handle); + entry = _mesa_hash_table_next_entry(h->ht, entry); + } + kk_query_table_finish(dev, &h->table); + ralloc_free(h->ht); + simple_mtx_destroy(&h->lock); +} + +static VkResult +kk_sampler_heap_add_locked(struct kk_device *dev, struct kk_sampler_heap *h, + struct mtl_sampler_packed desc, + struct kk_rc_sampler **out) +{ + struct hash_entry *ent = _mesa_hash_table_search(h->ht, &desc); + if (ent != NULL) { + *out = ent->data; + + assert((*out)->refcount != 0); + (*out)->refcount++; + + return VK_SUCCESS; + } + + struct kk_rc_sampler *rc = ralloc(h->ht, struct kk_rc_sampler); + if (!rc) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + mtl_sampler *handle = kk_sampler_create(dev, &desc); + uint64_t gpu_id = mtl_sampler_get_gpu_resource_id(handle); + + uint32_t index; + VkResult result = kk_query_table_add(dev, &h->table, gpu_id, &index); + if (result != VK_SUCCESS) { + mtl_release(handle); + ralloc_free(rc); + return result; + } + + *rc = (struct kk_rc_sampler){ + .key = desc, + .handle = handle, + .refcount = 1, + .index = index, + }; + + _mesa_hash_table_insert(h->ht, &rc->key, rc); + *out = rc; + + return VK_SUCCESS; +} + +VkResult +kk_sampler_heap_add(struct kk_device *dev, struct mtl_sampler_packed desc, + struct kk_rc_sampler **out) +{ + struct kk_sampler_heap *h = &dev->samplers; + + simple_mtx_lock(&h->lock); + VkResult result = kk_sampler_heap_add_locked(dev, h, desc, out); + simple_mtx_unlock(&h->lock); + + return result; +} + +static void +kk_sampler_heap_remove_locked(struct kk_device *dev, struct kk_sampler_heap *h, + struct kk_rc_sampler *rc) +{ + assert(rc->refcount != 0); + rc->refcount--; + + if (rc->refcount == 0) { + mtl_release(rc->handle); + kk_query_table_remove(dev, &h->table, rc->index); + _mesa_hash_table_remove_key(h->ht, &rc->key); + ralloc_free(rc); + } +} + +void +kk_sampler_heap_remove(struct kk_device *dev, struct kk_rc_sampler *rc) +{ + struct kk_sampler_heap *h = &dev->samplers; + + simple_mtx_lock(&h->lock); + kk_sampler_heap_remove_locked(dev, h, rc); + simple_mtx_unlock(&h->lock); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateDevice(VkPhysicalDevice physicalDevice, + const VkDeviceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkDevice *pDevice) +{ + VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice); + VkResult result = VK_ERROR_OUT_OF_HOST_MEMORY; + struct kk_device *dev; + + dev = vk_zalloc2(&pdev->vk.instance->alloc, pAllocator, sizeof(*dev), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!dev) + return vk_error(pdev, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* Fill the dispatch table we will expose to the users */ + vk_device_dispatch_table_from_entrypoints( + &dev->exposed_dispatch_table, &vk_cmd_enqueue_device_entrypoints, true); + vk_device_dispatch_table_from_entrypoints(&dev->exposed_dispatch_table, + &kk_device_entrypoints, false); + vk_device_dispatch_table_from_entrypoints(&dev->exposed_dispatch_table, + &wsi_device_entrypoints, false); + vk_device_dispatch_table_from_entrypoints( + &dev->exposed_dispatch_table, &vk_common_device_entrypoints, false); + + struct vk_device_dispatch_table dispatch_table; + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &kk_device_entrypoints, true); + vk_device_dispatch_table_from_entrypoints( + &dispatch_table, &vk_common_device_entrypoints, false); + vk_device_dispatch_table_from_entrypoints(&dispatch_table, + &wsi_device_entrypoints, false); + + result = vk_device_init(&dev->vk, &pdev->vk, &dispatch_table, pCreateInfo, + pAllocator); + if (result != VK_SUCCESS) + goto fail_alloc; + + dev->vk.shader_ops = &kk_device_shader_ops; + dev->mtl_handle = pdev->mtl_dev_handle; + dev->vk.command_buffer_ops = &kk_cmd_buffer_ops; + dev->vk.command_dispatch_table = &dev->vk.dispatch_table; + + /* Buffer to use as null descriptor */ + result = kk_alloc_bo(dev, &dev->vk.base, sizeof(uint64_t) * 8, 8u, + &dev->null_descriptor); + if (result != VK_SUCCESS) + goto fail_init; + + result = + kk_queue_init(dev, &dev->queue, &pCreateInfo->pQueueCreateInfos[0], 0); + if (result != VK_SUCCESS) + goto fail_vab_memory; + + result = kk_device_init_meta(dev); + if (result != VK_SUCCESS) + goto fail_mem_cache; + + result = kk_query_table_init(dev, &dev->occlusion_queries, + KK_MAX_OCCLUSION_QUERIES); + if (result != VK_SUCCESS) + goto fail_meta; + + result = kk_init_sampler_heap(dev, &dev->samplers); + if (result != VK_SUCCESS) + goto fail_query_table; + + result = kk_device_init_lib(dev); + if (result != VK_SUCCESS) + goto fail_sampler_heap; + + simple_mtx_init(&dev->user_heap_cache.mutex, mtx_plain); + util_dynarray_init(&dev->user_heap_cache.handles, NULL); + + *pDevice = kk_device_to_handle(dev); + + dev->gpu_capture_enabled = kk_get_environment_boolean(KK_ENABLE_GPU_CAPTURE); + mtl_start_gpu_capture(dev->mtl_handle); + + return VK_SUCCESS; + +fail_sampler_heap: + kk_destroy_sampler_heap(dev, &dev->samplers); +fail_query_table: + kk_query_table_finish(dev, &dev->occlusion_queries); +fail_meta: + kk_device_finish_meta(dev); +fail_mem_cache: + kk_queue_finish(dev, &dev->queue); +fail_vab_memory: + kk_destroy_bo(dev, dev->null_descriptor); +fail_init: + vk_device_finish(&dev->vk); +fail_alloc: + vk_free(&dev->vk.alloc, dev); + return result; +} + +VKAPI_ATTR void VKAPI_CALL +kk_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_device, dev, _device); + + if (!dev) + return; + + /* Meta first since it may destroy Vulkan objects */ + kk_device_finish_meta(dev); + + util_dynarray_fini(&dev->user_heap_cache.handles); + simple_mtx_destroy(&dev->user_heap_cache.mutex); + kk_device_finish_lib(dev); + kk_query_table_finish(dev, &dev->occlusion_queries); + kk_destroy_sampler_heap(dev, &dev->samplers); + + kk_queue_finish(dev, &dev->queue); + kk_destroy_bo(dev, dev->null_descriptor); + vk_device_finish(&dev->vk); + + if (dev->gpu_capture_enabled) { + mtl_stop_gpu_capture(); + } + + vk_free(&dev->vk.alloc, dev); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_GetCalibratedTimestampsKHR( + VkDevice _device, uint32_t timestampCount, + const VkCalibratedTimestampInfoKHR *pTimestampInfos, uint64_t *pTimestamps, + uint64_t *pMaxDeviation) +{ + uint64_t max_clock_period = 0; + uint64_t begin, end; + int d; + +#ifdef CLOCK_MONOTONIC_RAW + begin = vk_clock_gettime(CLOCK_MONOTONIC_RAW); +#else + begin = vk_clock_gettime(CLOCK_MONOTONIC); +#endif + + for (d = 0; d < timestampCount; d++) { + switch (pTimestampInfos[d].timeDomain) { + case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR: + pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC); + max_clock_period = MAX2(max_clock_period, 1); + break; + +#ifdef CLOCK_MONOTONIC_RAW + case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR: + pTimestamps[d] = begin; + break; +#endif + default: + pTimestamps[d] = 0; + break; + } + } + +#ifdef CLOCK_MONOTONIC_RAW + end = vk_clock_gettime(CLOCK_MONOTONIC_RAW); +#else + end = vk_clock_gettime(CLOCK_MONOTONIC); +#endif + + *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period); + + return VK_SUCCESS; +} + +/* We need to implement this ourselves so we give the fake ones for vk_common_* + * to work when executing actual commands */ +static PFN_vkVoidFunction +kk_device_get_proc_addr(const struct kk_device *device, const char *name) +{ + if (device == NULL || name == NULL) + return NULL; + + struct vk_instance *instance = device->vk.physical->instance; + return vk_device_dispatch_table_get_if_supported( + &device->exposed_dispatch_table, name, instance->app_info.api_version, + &instance->enabled_extensions, &device->vk.enabled_extensions); +} + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +kk_GetDeviceProcAddr(VkDevice _device, const char *pName) +{ + VK_FROM_HANDLE(kk_device, device, _device); + return kk_device_get_proc_addr(device, pName); +} + +void +kk_device_add_user_heap(struct kk_device *dev, mtl_heap *heap) +{ + simple_mtx_lock(&dev->user_heap_cache.mutex); + util_dynarray_append(&dev->user_heap_cache.handles, mtl_heap *, heap); + dev->user_heap_cache.hash += 1u; + simple_mtx_unlock(&dev->user_heap_cache.mutex); +} + +void +kk_device_remove_user_heap(struct kk_device *dev, mtl_heap *heap) +{ + simple_mtx_lock(&dev->user_heap_cache.mutex); + util_dynarray_delete_unordered(&dev->user_heap_cache.handles, mtl_heap *, + heap); + simple_mtx_unlock(&dev->user_heap_cache.mutex); +} diff --git a/src/kosmickrisp/vulkan/kk_device.h b/src/kosmickrisp/vulkan/kk_device.h new file mode 100644 index 00000000000..50e05e934c9 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_device.h @@ -0,0 +1,137 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_DEVICE_H +#define KK_DEVICE_H 1 + +#include "kk_private.h" + +#include "kk_query_table.h" +#include "kk_queue.h" + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "util/u_dynarray.h" + +#include "vk_device.h" +#include "vk_meta.h" +#include "vk_queue.h" + +struct kk_bo; +struct kk_physical_device; +struct vk_pipeline_cache; + +enum kk_device_lib_pipeline { + KK_LIB_IMM_WRITE = 0, + KK_LIB_COPY_QUERY, + KK_LIB_TRIANGLE_FAN, + KK_LIB_COUNT, +}; + +struct kk_user_heap_cache { + simple_mtx_t mutex; + uint32_t hash; + struct util_dynarray handles; +}; + +struct mtl_sampler_packed { + enum mtl_sampler_address_mode mode_u; + enum mtl_sampler_address_mode mode_v; + enum mtl_sampler_address_mode mode_w; + enum mtl_sampler_border_color border_color; + + enum mtl_sampler_min_mag_filter min_filter; + enum mtl_sampler_min_mag_filter mag_filter; + enum mtl_sampler_mip_filter mip_filter; + + enum mtl_compare_function compare_func; + float min_lod; + float max_lod; + uint32_t max_anisotropy; + bool normalized_coordinates; +}; + +struct kk_rc_sampler { + struct mtl_sampler_packed key; + + mtl_sampler *handle; + + /* Reference count for this hardware sampler, protected by the heap mutex */ + uint16_t refcount; + + /* Index of this hardware sampler in the hardware sampler heap */ + uint16_t index; +}; + +struct kk_sampler_heap { + simple_mtx_t lock; + + struct kk_query_table table; + + /* Map of agx_sampler_packed to hk_rc_sampler */ + struct hash_table *ht; +}; + +struct kk_device { + struct vk_device vk; + + mtl_device *mtl_handle; + + /* Dispatch table exposed to the user. Required since we need to record all + * commands due to Metal limitations */ + struct vk_device_dispatch_table exposed_dispatch_table; + + struct kk_bo *null_descriptor; + + struct kk_sampler_heap samplers; + struct kk_query_table occlusion_queries; + + /* Track all heaps the user allocated so we can set them all as resident when + * recording as required by Metal. */ + struct kk_user_heap_cache user_heap_cache; + + mtl_compute_pipeline_state *lib_pipelines[KK_LIB_COUNT]; + + struct kk_queue queue; + + struct vk_meta_device meta; + + bool gpu_capture_enabled; +}; + +VK_DEFINE_HANDLE_CASTS(kk_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) + +static inline mtl_compute_pipeline_state * +kk_device_lib_pipeline(const struct kk_device *dev, + enum kk_device_lib_pipeline pipeline) +{ + assert(pipeline < KK_LIB_COUNT); + return dev->lib_pipelines[pipeline]; +} + +static inline struct kk_physical_device * +kk_device_physical(const struct kk_device *dev) +{ + return (struct kk_physical_device *)dev->vk.physical; +} + +VkResult kk_device_init_meta(struct kk_device *dev); +void kk_device_finish_meta(struct kk_device *dev); +VkResult kk_device_init_lib(struct kk_device *dev); +void kk_device_finish_lib(struct kk_device *dev); +void kk_device_add_user_heap(struct kk_device *dev, mtl_heap *heap); +void kk_device_remove_user_heap(struct kk_device *dev, mtl_heap *heap); + +/* Required to create a sampler */ +mtl_sampler *kk_sampler_create(struct kk_device *dev, + const struct mtl_sampler_packed *packed); +VkResult kk_sampler_heap_add(struct kk_device *dev, + struct mtl_sampler_packed desc, + struct kk_rc_sampler **out); +void kk_sampler_heap_remove(struct kk_device *dev, struct kk_rc_sampler *rc); + +#endif // KK_DEVICE_H diff --git a/src/kosmickrisp/vulkan/kk_device_lib.c b/src/kosmickrisp/vulkan/kk_device_lib.c new file mode 100644 index 00000000000..9d2afbf9e45 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_device_lib.c @@ -0,0 +1,191 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_device.h" + +#include "kk_shader.h" + +#include "kkcl.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +#include "nir/nir.h" +#include "nir/nir_builder.h" + +static nir_def * +load_struct_var(nir_builder *b, nir_variable *var, uint32_t field) +{ + nir_deref_instr *deref = + nir_build_deref_struct(b, nir_build_deref_var(b, var), field); + return nir_load_deref(b, deref); +} + +static nir_shader * +create_imm_write_shader() +{ + nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, + "kk-meta-imm-write-u64"); + nir_builder *b = &build; + + struct glsl_struct_field push_fields[] = { + {.type = glsl_uint64_t_type(), .name = "buffer_address", .offset = 0}, + }; + const struct glsl_type *push_iface_type = glsl_interface_type( + push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140, + false /* row_major */, "push"); + nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const, + push_iface_type, "push"); + + b->shader->info.workgroup_size[0] = 1; + b->shader->info.workgroup_size[1] = 1; + b->shader->info.workgroup_size[2] = 1; + + libkk_write_u64(b, load_struct_var(b, push, 0)); + + return build.shader; +} + +static nir_shader * +create_copy_query_shader() +{ + nir_builder build = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, + "kk-meta-copy-queries"); + nir_builder *b = &build; + + struct glsl_struct_field push_fields[] = { + {.type = glsl_uint64_t_type(), .name = "availability", .offset = 0}, + {.type = glsl_uint64_t_type(), .name = "results", .offset = 8}, + {.type = glsl_uint64_t_type(), .name = "indices", .offset = 16}, + {.type = glsl_uint64_t_type(), .name = "dst_addr", .offset = 24}, + {.type = glsl_uint64_t_type(), .name = "dst_stride", .offset = 32}, + {.type = glsl_uint_type(), .name = "first_query", .offset = 40}, + {.type = glsl_uint_type(), .name = "flags", .offset = 44}, + {.type = glsl_uint16_t_type(), .name = "reports_per_query", .offset = 48}, + }; + /* TODO_KOSMICKRISP Don't use push constants and directly bind the buffer to + * the binding index. This requires compiler work first to remove the + * hard-coded buffer0 value. Same applies to other creation functions. + */ + const struct glsl_type *push_iface_type = glsl_interface_type( + push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140, + false /* row_major */, "push"); + nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const, + push_iface_type, "push"); + + b->shader->info.workgroup_size[0] = 1; + b->shader->info.workgroup_size[1] = 1; + b->shader->info.workgroup_size[2] = 1; + + libkk_copy_queries(b, load_struct_var(b, push, 0), + load_struct_var(b, push, 1), load_struct_var(b, push, 2), + load_struct_var(b, push, 3), load_struct_var(b, push, 4), + load_struct_var(b, push, 5), load_struct_var(b, push, 6), + load_struct_var(b, push, 7)); + + return build.shader; +} + +static nir_shader * +create_triangle_fan_shader() +{ + nir_builder build = nir_builder_init_simple_shader( + MESA_SHADER_COMPUTE, NULL, "kk-device-unroll-geomtry-and-restart"); + nir_builder *b = &build; + + struct glsl_struct_field push_fields[] = { + {.type = glsl_uint64_t_type(), .name = "index_buffer", .offset = 0}, + {.type = glsl_uint64_t_type(), .name = "out_ptr", .offset = 8}, + {.type = glsl_uint64_t_type(), .name = "indirect_in", .offset = 16}, + {.type = glsl_uint64_t_type(), .name = "indirect_out", .offset = 24}, + {.type = glsl_uint_type(), .name = "restart_index", .offset = 32}, + {.type = glsl_uint_type(), .name = "index_buffer_size_el", .offset = 36}, + {.type = glsl_uint_type(), .name = "in_el_size_B,", .offset = 40}, + {.type = glsl_uint_type(), .name = "out_el_size_B,", .offset = 44}, + {.type = glsl_uint_type(), .name = "flatshade_first", .offset = 48}, + {.type = glsl_uint_type(), .name = "mode", .offset = 52}, + }; + const struct glsl_type *push_iface_type = glsl_interface_type( + push_fields, ARRAY_SIZE(push_fields), GLSL_INTERFACE_PACKING_STD140, + false /* row_major */, "push"); + nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const, + push_iface_type, "push"); + + b->shader->info.workgroup_size[0] = 1; + b->shader->info.workgroup_size[1] = 1; + b->shader->info.workgroup_size[2] = 1; + + libkk_unroll_geometry_and_restart( + b, load_struct_var(b, push, 0), load_struct_var(b, push, 1), + load_struct_var(b, push, 2), load_struct_var(b, push, 3), + load_struct_var(b, push, 4), load_struct_var(b, push, 5), + load_struct_var(b, push, 6), load_struct_var(b, push, 7), + load_struct_var(b, push, 8), load_struct_var(b, push, 9)); + + return build.shader; +} + +static struct { + enum kk_device_lib_pipeline ndx; + nir_shader *(*create_shader_fn)(); +} lib_shaders[KK_LIB_COUNT] = { + {KK_LIB_IMM_WRITE, create_imm_write_shader}, + {KK_LIB_COPY_QUERY, create_copy_query_shader}, + {KK_LIB_TRIANGLE_FAN, create_triangle_fan_shader}, +}; +static_assert(ARRAY_SIZE(lib_shaders) == KK_LIB_COUNT, + "Device lib shader count and created shader count mismatch"); + +VkResult +kk_device_init_lib(struct kk_device *dev) +{ + VkResult result = VK_SUCCESS; + uint32_t i = 0u; + for (; i < KK_LIB_COUNT; ++i) { + nir_shader *s = lib_shaders[i].create_shader_fn(); + if (!s) + goto fail; + + struct kk_shader *shader = NULL; + result = kk_compile_nir_shader(dev, s, &dev->vk.alloc, &shader); + if (result != VK_SUCCESS) + goto fail; + + mtl_library *library = mtl_new_library(dev->mtl_handle, shader->msl_code); + if (library == NULL) + goto fail; + + uint32_t local_size_threads = shader->info.cs.local_size.x * + shader->info.cs.local_size.y * + shader->info.cs.local_size.z; + mtl_function *function = + mtl_new_function_with_name(library, shader->entrypoint_name); + dev->lib_pipelines[i] = mtl_new_compute_pipeline_state( + dev->mtl_handle, function, local_size_threads); + mtl_release(function); + mtl_release(library); + + /* We no longer need the shader. Although it may be useful to keep it + * alive for the info maybe? */ + shader->vk.ops->destroy(&dev->vk, &shader->vk, &dev->vk.alloc); + + if (!dev->lib_pipelines[i]) + goto fail; + } + + return result; + +fail: + for (uint32_t j = 0u; j < i; ++j) + mtl_release(dev->lib_pipelines[j]); + return vk_error(dev, result); +} + +void +kk_device_finish_lib(struct kk_device *dev) +{ + for (uint32_t i = 0; i < KK_LIB_COUNT; ++i) + mtl_release(dev->lib_pipelines[i]); +} diff --git a/src/kosmickrisp/vulkan/kk_device_memory.c b/src/kosmickrisp/vulkan/kk_device_memory.c new file mode 100644 index 00000000000..e020aa979f4 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_device_memory.c @@ -0,0 +1,258 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_device_memory.h" + +#include "kk_device.h" +#include "kk_entrypoints.h" +#include "kk_physical_device.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +#include "vulkan/vulkan_metal.h" + +#include "util/u_atomic.h" +#include "util/u_memory.h" + +#include <inttypes.h> +#include <sys/mman.h> + +/* Supports mtlheap only */ +const VkExternalMemoryProperties kk_mtlheap_mem_props = { + .externalMemoryFeatures = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT, + .exportFromImportedHandleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT, + .compatibleHandleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT, +}; + +#ifdef VK_USE_PLATFORM_METAL_EXT +VKAPI_ATTR VkResult VKAPI_CALL +kk_GetMemoryMetalHandlePropertiesEXT( + VkDevice device, VkExternalMemoryHandleTypeFlagBits handleType, + const void *pHandle, + VkMemoryMetalHandlePropertiesEXT *pMemoryMetalHandleProperties) +{ + VK_FROM_HANDLE(kk_device, dev, device); + struct kk_physical_device *pdev = kk_device_physical(dev); + + /* We only support heaps since that's the backing for all our memory and + * simplifies implementation */ + switch (handleType) { + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT: + break; + default: + return vk_error(dev, VK_ERROR_INVALID_EXTERNAL_HANDLE); + } + pMemoryMetalHandleProperties->memoryTypeBits = + BITFIELD_MASK(pdev->mem_type_count); + + return VK_SUCCESS; +} +#endif /* VK_USE_PLATFORM_METAL_EXT */ + +VKAPI_ATTR VkResult VKAPI_CALL +kk_AllocateMemory(VkDevice device, const VkMemoryAllocateInfo *pAllocateInfo, + const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem) +{ + VK_FROM_HANDLE(kk_device, dev, device); + struct kk_physical_device *pdev = kk_device_physical(dev); + struct kk_device_memory *mem; + VkResult result = VK_SUCCESS; + const VkImportMemoryMetalHandleInfoEXT *metal_info = vk_find_struct_const( + pAllocateInfo->pNext, IMPORT_MEMORY_METAL_HANDLE_INFO_EXT); + const VkMemoryType *type = &pdev->mem_types[pAllocateInfo->memoryTypeIndex]; + + // TODO_KOSMICKRISP Do the actual memory allocation with alignment requirements + uint32_t alignment = (1ULL << 12); + + const uint64_t aligned_size = + align64(pAllocateInfo->allocationSize, alignment); + + mem = vk_device_memory_create(&dev->vk, pAllocateInfo, pAllocator, + sizeof(*mem)); + if (!mem) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + if (metal_info && metal_info->handleType) { + /* We only support heaps since that's the backing for all our memory and + * simplifies implementation */ + assert(metal_info->handleType == + VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT); + mem->bo = CALLOC_STRUCT(kk_bo); + if (!mem->bo) { + result = vk_errorf(&dev->vk.base, VK_ERROR_OUT_OF_DEVICE_MEMORY, "%m"); + goto fail_alloc; + } + mem->bo->mtl_handle = mtl_retain(metal_info->handle); + mem->bo->map = + mtl_new_buffer_with_length(mem->bo->mtl_handle, mem->vk.size, 0u); + mem->bo->gpu = mtl_buffer_get_gpu_address(mem->bo->map); + mem->bo->cpu = mtl_get_contents(mem->bo->map); + mem->bo->size_B = mtl_heap_get_size(mem->bo->mtl_handle); + } else { + result = + kk_alloc_bo(dev, &dev->vk.base, aligned_size, alignment, &mem->bo); + if (result != VK_SUCCESS) + goto fail_alloc; + } + + struct kk_memory_heap *heap = &pdev->mem_heaps[type->heapIndex]; + p_atomic_add(&heap->used, mem->bo->size_B); + + kk_device_add_user_heap(dev, mem->bo->mtl_handle); + + *pMem = kk_device_memory_to_handle(mem); + + return VK_SUCCESS; + +fail_alloc: + vk_device_memory_destroy(&dev->vk, pAllocator, &mem->vk); + return result; +} + +VKAPI_ATTR void VKAPI_CALL +kk_FreeMemory(VkDevice device, VkDeviceMemory _mem, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_device_memory, mem, _mem); + struct kk_physical_device *pdev = kk_device_physical(dev); + + if (!mem) + return; + + kk_device_remove_user_heap(dev, mem->bo->mtl_handle); + + const VkMemoryType *type = &pdev->mem_types[mem->vk.memory_type_index]; + struct kk_memory_heap *heap = &pdev->mem_heaps[type->heapIndex]; + p_atomic_add(&heap->used, -((int64_t)mem->bo->size_B)); + + kk_destroy_bo(dev, mem->bo); + + vk_device_memory_destroy(&dev->vk, pAllocator, &mem->vk); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_MapMemory2KHR(VkDevice device, const VkMemoryMapInfoKHR *pMemoryMapInfo, + void **ppData) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_device_memory, mem, pMemoryMapInfo->memory); + VkResult result = VK_SUCCESS; + + if (mem == NULL) { + *ppData = NULL; + return VK_SUCCESS; + } + + const VkDeviceSize offset = pMemoryMapInfo->offset; + const VkDeviceSize size = vk_device_memory_range( + &mem->vk, pMemoryMapInfo->offset, pMemoryMapInfo->size); + + /* From the Vulkan spec version 1.0.32 docs for MapMemory: + * + * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0 + * assert(size != 0); + * * If size is not equal to VK_WHOLE_SIZE, size must be less than or + * equal to the size of the memory minus offset + */ + assert(size > 0); + assert(offset + size <= mem->bo->size_B); + + if (size != (size_t)size) { + return vk_errorf(dev, VK_ERROR_MEMORY_MAP_FAILED, + "requested size 0x%" PRIx64 " does not fit in %u bits", + size, (unsigned)(sizeof(size_t) * 8)); + } + + /* From the Vulkan 1.2.194 spec: + * + * "memory must not be currently host mapped" + */ + if (mem->map != NULL) { + return vk_errorf(dev, VK_ERROR_MEMORY_MAP_FAILED, + "Memory object already mapped."); + } + + // TODO_KOSMICKRISP Use mmap here to so we can support VK_EXT_map_memory_placed + mem->map = mem->bo->cpu; + + *ppData = mem->map + offset; + + return result; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_UnmapMemory2KHR(VkDevice device, + const VkMemoryUnmapInfoKHR *pMemoryUnmapInfo) +{ + VK_FROM_HANDLE(kk_device_memory, mem, pMemoryUnmapInfo->memory); + + if (mem == NULL) + return VK_SUCCESS; + + // TODO_KOSMICKRISP Use unmap here to so we can support + // VK_EXT_map_memory_placed + mem->map = NULL; + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_FlushMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_InvalidateMappedMemoryRanges(VkDevice device, uint32_t memoryRangeCount, + const VkMappedMemoryRange *pMemoryRanges) +{ + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory _mem, + VkDeviceSize *pCommittedMemoryInBytes) +{ + VK_FROM_HANDLE(kk_device_memory, mem, _mem); + + *pCommittedMemoryInBytes = mem->bo->size_B; +} + +#ifdef VK_USE_PLATFORM_METAL_EXT +VKAPI_ATTR VkResult VKAPI_CALL +kk_GetMemoryMetalHandleEXT( + VkDevice device, const VkMemoryGetMetalHandleInfoEXT *pGetMetalHandleInfo, + void **pHandle) +{ + /* We only support heaps since that's the backing for all our memory and + * simplifies implementation */ + assert(pGetMetalHandleInfo->handleType == + VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT); + VK_FROM_HANDLE(kk_device_memory, mem, pGetMetalHandleInfo->memory); + + /* From the Vulkan spec of vkGetMemoryMetalHandleEXT: + * + * "Unless the app retains the handle object returned by the call, + * the lifespan will be the same as the associated VkDeviceMemory" + */ + *pHandle = mem->bo->mtl_handle; + return VK_SUCCESS; +} +#endif /* VK_USE_PLATFORM_METAL_EXT */ + +VKAPI_ATTR uint64_t VKAPI_CALL +kk_GetDeviceMemoryOpaqueCaptureAddress( + UNUSED VkDevice device, const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo) +{ + VK_FROM_HANDLE(kk_device_memory, mem, pInfo->memory); + + return mem->bo->gpu; +} diff --git a/src/kosmickrisp/vulkan/kk_device_memory.h b/src/kosmickrisp/vulkan/kk_device_memory.h new file mode 100644 index 00000000000..44ced28aa67 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_device_memory.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_MEMORY_H +#define KK_MEMORY_H 1 + +#include "kk_private.h" + +#include "kk_bo.h" + +#include "vk_device_memory.h" + +#include "util/list.h" + +struct kk_device_memory { + struct vk_device_memory vk; + struct kk_bo *bo; + void *map; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_device_memory, vk.base, VkDeviceMemory, + VK_OBJECT_TYPE_DEVICE_MEMORY) + +extern const VkExternalMemoryProperties kk_mtlheap_mem_props; + +#endif // KK_MEMORY_H diff --git a/src/kosmickrisp/vulkan/kk_encoder.c b/src/kosmickrisp/vulkan/kk_encoder.c new file mode 100644 index 00000000000..ca083674939 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_encoder.c @@ -0,0 +1,480 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_encoder.h" + +#include "kk_bo.h" +#include "kk_cmd_buffer.h" +#include "kk_queue.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" +#include "kosmickrisp/bridge/vk_to_mtl_map.h" + +#include "cl/kk_query.h" + +static void +kk_encoder_start_internal(struct kk_encoder_internal *encoder, + mtl_device *device, mtl_command_queue *queue) +{ + encoder->cmd_buffer = mtl_new_command_buffer(queue); + encoder->last_used = KK_ENC_NONE; + util_dynarray_init(&encoder->fences, NULL); +} + +VkResult +kk_encoder_init(mtl_device *device, struct kk_queue *queue, + struct kk_encoder **encoder) +{ + assert(encoder && device && queue); + struct kk_encoder *enc = (struct kk_encoder *)malloc(sizeof(*enc)); + if (!enc) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + memset(enc, 0u, sizeof(*enc)); + enc->dev = device; + kk_encoder_start_internal(&enc->main, device, queue->main.mtl_handle); + kk_encoder_start_internal(&enc->pre_gfx, device, queue->pre_gfx.mtl_handle); + enc->event = mtl_new_event(device); + util_dynarray_init(&enc->imm_writes, NULL); + util_dynarray_init(&enc->resident_buffers, NULL); + util_dynarray_init(&enc->copy_query_pool_result_infos, NULL); + + *encoder = enc; + return VK_SUCCESS; +} + +mtl_render_encoder * +kk_encoder_start_render(struct kk_cmd_buffer *cmd, + mtl_render_pass_descriptor *descriptor, + uint32_t view_mask) +{ + struct kk_encoder *encoder = cmd->encoder; + /* We must not already be in a render encoder */ + assert(encoder->main.last_used != KK_ENC_RENDER || + encoder->main.encoder == NULL); + if (encoder->main.last_used != KK_ENC_RENDER) { + kk_encoder_signal_fence_and_end(cmd); + + /* Before we start any render operation we need to ensure we have the + * requried signals to insert pre_gfx execution before the render encoder + * in case we need to insert commands to massage input data for things + * like triangle fans. For this, we signal the value pre_gfx will wait on, + * and we wait on the value pre_gfx will signal once completed. + */ + encoder->signal_value_pre_gfx = encoder->event_value; + mtl_encode_signal_event(encoder->main.cmd_buffer, encoder->event, + ++encoder->event_value); + encoder->wait_value_pre_gfx = encoder->event_value; + mtl_encode_wait_for_event(encoder->main.cmd_buffer, encoder->event, + ++encoder->event_value); + + encoder->main.encoder = mtl_new_render_command_encoder_with_descriptor( + encoder->main.cmd_buffer, descriptor); + if (encoder->main.wait_fence) { + mtl_render_wait_for_fence( + encoder->main.encoder, + util_dynarray_top(&encoder->main.fences, mtl_fence *)); + encoder->main.wait_fence = false; + } + + uint32_t layer_ids[KK_MAX_MULTIVIEW_VIEW_COUNT] = {}; + uint32_t count = 0u; + u_foreach_bit(id, view_mask) + layer_ids[count++] = id; + if (view_mask == 0u) { + layer_ids[count++] = 0; + } + mtl_set_vertex_amplification_count(encoder->main.encoder, layer_ids, + count); + encoder->main.user_heap_hash = UINT32_MAX; + + /* Bind read only data aka samplers' argument buffer. */ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + mtl_set_vertex_buffer(encoder->main.encoder, dev->samplers.table.bo->map, + 0u, 1u); + mtl_set_fragment_buffer(encoder->main.encoder, + dev->samplers.table.bo->map, 0u, 1u); + } + encoder->main.last_used = KK_ENC_RENDER; + return encoder->main.encoder; +} + +mtl_compute_encoder * +kk_encoder_start_compute(struct kk_cmd_buffer *cmd) +{ + struct kk_encoder *encoder = cmd->encoder; + /* We must not already be in a render encoder */ + assert(encoder->main.last_used != KK_ENC_RENDER || + encoder->main.encoder == NULL); + struct kk_encoder_internal *enc = &encoder->main; + if (encoder->main.last_used != KK_ENC_COMPUTE) { + kk_encoder_signal_fence_and_end(cmd); + enc->encoder = mtl_new_compute_command_encoder(enc->cmd_buffer); + if (enc->wait_fence) { + mtl_compute_wait_for_fence( + enc->encoder, util_dynarray_top(&enc->fences, mtl_fence *)); + enc->wait_fence = false; + } + enc->user_heap_hash = UINT32_MAX; + + /* Bind read only data aka samplers' argument buffer. */ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + mtl_compute_set_buffer(enc->encoder, dev->samplers.table.bo->map, 0u, 1u); + } + encoder->main.last_used = KK_ENC_COMPUTE; + return encoder->main.encoder; +} + +mtl_compute_encoder * +kk_encoder_start_blit(struct kk_cmd_buffer *cmd) +{ + struct kk_encoder *encoder = cmd->encoder; + /* We must not already be in a render encoder */ + assert(encoder->main.last_used != KK_ENC_RENDER || + encoder->main.encoder == NULL); + struct kk_encoder_internal *enc = &encoder->main; + if (encoder->main.last_used != KK_ENC_BLIT) { + kk_encoder_signal_fence_and_end(cmd); + enc->encoder = mtl_new_blit_command_encoder(enc->cmd_buffer); + if (enc->wait_fence) { + mtl_compute_wait_for_fence( + enc->encoder, util_dynarray_top(&enc->fences, mtl_fence *)); + enc->wait_fence = false; + } + } + encoder->main.last_used = KK_ENC_BLIT; + return encoder->main.encoder; +} + +void +kk_encoder_end(struct kk_cmd_buffer *cmd) +{ + assert(cmd); + + kk_encoder_signal_fence_and_end(cmd); + + /* Let remaining render encoders run without waiting since we are done */ + mtl_encode_signal_event(cmd->encoder->pre_gfx.cmd_buffer, + cmd->encoder->event, cmd->encoder->event_value); +} + +struct kk_imm_write_push { + uint64_t buffer_address; + uint32_t count; +}; + +void +upload_queue_writes(struct kk_cmd_buffer *cmd) +{ + struct kk_encoder *enc = cmd->encoder; + + struct kk_device *dev = kk_cmd_buffer_device(cmd); + uint32_t count = util_dynarray_num_elements(&enc->imm_writes, uint64_t) / 2u; + if (count != 0) { + mtl_compute_encoder *compute = kk_compute_encoder(cmd); + struct kk_bo *bo = kk_cmd_allocate_buffer(cmd, enc->imm_writes.size, 8u); + /* kk_cmd_allocate_buffer sets the cmd buffer error so we can just exit */ + if (!bo) + return; + memcpy(bo->cpu, enc->imm_writes.data, enc->imm_writes.size); + uint32_t buffer_count = + util_dynarray_num_elements(&enc->resident_buffers, mtl_buffer *); + mtl_compute_use_resource(compute, bo->map, MTL_RESOURCE_USAGE_READ); + mtl_compute_use_resources( + compute, enc->resident_buffers.data, buffer_count, + MTL_RESOURCE_USAGE_READ | MTL_RESOURCE_USAGE_WRITE); + struct kk_imm_write_push push_data = { + .buffer_address = bo->gpu, + .count = count, + }; + kk_cmd_dispatch_pipeline(cmd, compute, + kk_device_lib_pipeline(dev, KK_LIB_IMM_WRITE), + &push_data, sizeof(push_data), count, 1, 1); + enc->resident_buffers.size = 0u; + enc->imm_writes.size = 0u; + } + + count = util_dynarray_num_elements(&enc->copy_query_pool_result_infos, + struct kk_copy_query_pool_results_info); + if (count != 0u) { + mtl_compute_encoder *compute = kk_compute_encoder(cmd); + uint32_t buffer_count = + util_dynarray_num_elements(&enc->resident_buffers, mtl_buffer *); + mtl_compute_use_resources( + compute, enc->resident_buffers.data, buffer_count, + MTL_RESOURCE_USAGE_READ | MTL_RESOURCE_USAGE_WRITE); + + for (uint32_t i = 0u; i < count; ++i) { + struct kk_copy_query_pool_results_info *push_data = + util_dynarray_element(&enc->copy_query_pool_result_infos, + struct kk_copy_query_pool_results_info, i); + + kk_cmd_dispatch_pipeline( + cmd, compute, kk_device_lib_pipeline(dev, KK_LIB_COPY_QUERY), + push_data, sizeof(*push_data), push_data->query_count, 1, 1); + } + enc->resident_buffers.size = 0u; + enc->copy_query_pool_result_infos.size = 0u; + } + + /* All immediate write done, reset encoder */ + kk_encoder_signal_fence_and_end(cmd); +} + +void +kk_encoder_signal_fence_and_end(struct kk_cmd_buffer *cmd) +{ + struct kk_encoder *encoder = cmd->encoder; + /* End pre_gfx */ + if (encoder->pre_gfx.encoder) { + mtl_end_encoding(encoder->pre_gfx.encoder); + mtl_release(encoder->pre_gfx.encoder); + encoder->pre_gfx.encoder = NULL; + + /* We can start rendering once all pre-graphics work is done */ + mtl_encode_signal_event(encoder->pre_gfx.cmd_buffer, encoder->event, + encoder->event_value); + } + + assert(encoder); + enum kk_encoder_type type = encoder->main.last_used; + struct kk_encoder_internal *enc = kk_encoder_get_internal(encoder, type); + if (!enc || !enc->encoder) + return; + + mtl_fence *fence = mtl_new_fence(encoder->dev); + switch (type) { + case KK_ENC_RENDER: + mtl_render_update_fence(enc->encoder, fence); + break; + case KK_ENC_COMPUTE: + mtl_compute_update_fence(enc->encoder, fence); + break; + case KK_ENC_BLIT: + mtl_blit_update_fence(enc->encoder, fence); + break; + default: + assert(0); + break; + } + + mtl_end_encoding(enc->encoder); + mtl_release(enc->encoder); + enc->encoder = NULL; + enc->last_used = KK_ENC_NONE; + enc->wait_fence = true; + util_dynarray_append(&enc->fences, mtl_fence *, fence); + + if (cmd->drawable) { + mtl_present_drawable(enc->cmd_buffer, cmd->drawable); + cmd->drawable = NULL; + } + upload_queue_writes(cmd); +} + +static void +kk_post_execution_release_internal(struct kk_encoder_internal *encoder) +{ + mtl_release(encoder->cmd_buffer); + util_dynarray_foreach(&encoder->fences, mtl_fence *, fence) + mtl_release(*fence); + util_dynarray_fini(&encoder->fences); +} + +static void +kk_post_execution_release(void *data) +{ + struct kk_encoder *encoder = data; + kk_post_execution_release_internal(&encoder->main); + kk_post_execution_release_internal(&encoder->pre_gfx); + mtl_release(encoder->event); + util_dynarray_fini(&encoder->imm_writes); + util_dynarray_fini(&encoder->resident_buffers); + util_dynarray_fini(&encoder->copy_query_pool_result_infos); + free(encoder); +} + +void +kk_encoder_submit(struct kk_encoder *encoder) +{ + assert(encoder); + + mtl_add_completed_handler(encoder->main.cmd_buffer, + kk_post_execution_release, encoder); + + mtl_command_buffer_commit(encoder->pre_gfx.cmd_buffer); + mtl_command_buffer_commit(encoder->main.cmd_buffer); +} + +mtl_render_encoder * +kk_render_encoder(struct kk_cmd_buffer *cmd) +{ + struct kk_encoder *encoder = cmd->encoder; + /* Render encoders are created at vkBeginRendering only */ + assert(encoder->main.last_used == KK_ENC_RENDER && encoder->main.encoder); + return (mtl_render_encoder *)encoder->main.encoder; +} + +mtl_compute_encoder * +kk_compute_encoder(struct kk_cmd_buffer *cmd) +{ + struct kk_encoder *encoder = cmd->encoder; + return encoder->main.last_used == KK_ENC_COMPUTE + ? (mtl_blit_encoder *)encoder->main.encoder + : kk_encoder_start_compute(cmd); +} + +mtl_blit_encoder * +kk_blit_encoder(struct kk_cmd_buffer *cmd) +{ + struct kk_encoder *encoder = cmd->encoder; + return encoder->main.last_used == KK_ENC_BLIT + ? (mtl_blit_encoder *)encoder->main.encoder + : kk_encoder_start_blit(cmd); +} + +struct kk_encoder_internal * +kk_encoder_get_internal(struct kk_encoder *encoder, enum kk_encoder_type type) +{ + switch (type) { + case KK_ENC_NONE: + assert(encoder->main.last_used == KK_ENC_NONE); + return NULL; + case KK_ENC_RENDER: + assert(encoder->main.last_used == KK_ENC_RENDER); + return &encoder->main; + case KK_ENC_COMPUTE: + assert(encoder->main.last_used == KK_ENC_COMPUTE); + return &encoder->main; + case KK_ENC_BLIT: + assert(encoder->main.last_used == KK_ENC_BLIT); + return &encoder->main; + default: + assert(0); + return NULL; + } +} + +static mtl_compute_encoder * +kk_encoder_pre_gfx_encoder(struct kk_encoder *encoder) +{ + if (!encoder->pre_gfx.encoder) { + /* Fast-forward all previous render encoders and wait for the last one */ + mtl_encode_signal_event(encoder->pre_gfx.cmd_buffer, encoder->event, + encoder->signal_value_pre_gfx); + mtl_encode_wait_for_event(encoder->pre_gfx.cmd_buffer, encoder->event, + encoder->wait_value_pre_gfx); + encoder->pre_gfx.encoder = + mtl_new_compute_command_encoder(encoder->pre_gfx.cmd_buffer); + } + + return encoder->pre_gfx.encoder; +} + +struct kk_triangle_fan_info { + uint64_t index_buffer; + uint64_t out_ptr; + uint64_t in_draw; + uint64_t out_draw; + uint32_t restart_index; + uint32_t index_buffer_size_el; + uint32_t in_el_size_B; + uint32_t out_el_size_B; + uint32_t flatshade_first; + uint32_t mode; +}; + +static void +kk_encoder_render_triangle_fan_common(struct kk_cmd_buffer *cmd, + struct kk_triangle_fan_info *info, + mtl_buffer *indirect, mtl_buffer *index, + uint32_t index_count, + uint32_t in_el_size_B, + uint32_t out_el_size_B) +{ + uint32_t index_buffer_size_B = index_count * out_el_size_B; + uint32_t buffer_size_B = + sizeof(VkDrawIndexedIndirectCommand) + index_buffer_size_B; + struct kk_bo *index_buffer = + kk_cmd_allocate_buffer(cmd, buffer_size_B, out_el_size_B); + + if (!index_buffer) + return; + + info->out_ptr = index_buffer->gpu + sizeof(VkDrawIndexedIndirectCommand); + info->out_draw = index_buffer->gpu; + info->in_el_size_B = in_el_size_B; + info->out_el_size_B = out_el_size_B; + info->flatshade_first = true; + mtl_compute_encoder *encoder = kk_encoder_pre_gfx_encoder(cmd->encoder); + if (index) + mtl_compute_use_resource(encoder, index, MTL_RESOURCE_USAGE_READ); + mtl_compute_use_resource(encoder, indirect, MTL_RESOURCE_USAGE_READ); + mtl_compute_use_resource(encoder, index_buffer->map, + MTL_RESOURCE_USAGE_WRITE); + + struct kk_device *dev = kk_cmd_buffer_device(cmd); + kk_cmd_dispatch_pipeline(cmd, encoder, + kk_device_lib_pipeline(dev, KK_LIB_TRIANGLE_FAN), + info, sizeof(*info), 1u, 1u, 1u); + + enum mtl_index_type index_type = + index_size_in_bytes_to_mtl_index_type(out_el_size_B); + mtl_render_encoder *enc = kk_render_encoder(cmd); + mtl_draw_indexed_primitives_indirect( + enc, cmd->state.gfx.primitive_type, index_type, index_buffer->map, + sizeof(VkDrawIndexedIndirectCommand), index_buffer->map, 0u); +} + +void +kk_encoder_render_triangle_fan_indirect(struct kk_cmd_buffer *cmd, + mtl_buffer *indirect, uint64_t offset) +{ + enum mesa_prim mode = cmd->state.gfx.prim; + uint32_t decomposed_index_count = + u_decomposed_prims_for_vertices(mode, cmd->state.gfx.vb.max_vertices) * + mesa_vertices_per_prim(mode); + uint32_t el_size_B = decomposed_index_count < UINT16_MAX ? 2u : 4u; + struct kk_triangle_fan_info info = { + .in_draw = mtl_buffer_get_gpu_address(indirect) + offset, + .restart_index = UINT32_MAX, /* No restart */ + .mode = mode, + }; + kk_encoder_render_triangle_fan_common( + cmd, &info, indirect, NULL, decomposed_index_count, el_size_B, el_size_B); +} + +void +kk_encoder_render_triangle_fan_indexed_indirect(struct kk_cmd_buffer *cmd, + mtl_buffer *indirect, + uint64_t offset, + bool increase_el_size) +{ + uint32_t el_size_B = cmd->state.gfx.index.bytes_per_index; + + enum mesa_prim mode = cmd->state.gfx.prim; + uint32_t max_index_count = + (mtl_buffer_get_length(cmd->state.gfx.index.handle) - + cmd->state.gfx.index.offset) / + el_size_B; + uint32_t decomposed_index_count = + u_decomposed_prims_for_vertices(mode, max_index_count) * + mesa_vertices_per_prim(mode); + + struct kk_triangle_fan_info info = { + .index_buffer = mtl_buffer_get_gpu_address(cmd->state.gfx.index.handle) + + cmd->state.gfx.index.offset, + .in_draw = mtl_buffer_get_gpu_address(indirect) + offset, + .restart_index = + increase_el_size ? UINT32_MAX : cmd->state.gfx.index.restart, + .index_buffer_size_el = max_index_count, + .mode = mode, + }; + uint32_t out_el_size_B = increase_el_size ? sizeof(uint32_t) : el_size_B; + kk_encoder_render_triangle_fan_common( + cmd, &info, indirect, cmd->state.gfx.index.handle, decomposed_index_count, + el_size_B, out_el_size_B); +} diff --git a/src/kosmickrisp/vulkan/kk_encoder.h b/src/kosmickrisp/vulkan/kk_encoder.h new file mode 100644 index 00000000000..5c45b87ecdd --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_encoder.h @@ -0,0 +1,125 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_ENCODER_H +#define KK_ENCODER_H 1 + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "util/u_dynarray.h" + +#include "vulkan/vulkan.h" + +struct kk_queue; +struct kk_cmd_buffer; + +enum kk_encoder_type { + KK_ENC_NONE = 0, + KK_ENC_RENDER = BITFIELD_BIT(0), + KK_ENC_COMPUTE = BITFIELD_BIT(1), + KK_ENC_BLIT = BITFIELD_BIT(2), + KK_ENC_ALL = (KK_ENC_RENDER | KK_ENC_COMPUTE | KK_ENC_BLIT), + KK_ENC_COUNT = 3u, +}; + +struct kk_encoder_internal { + mtl_command_buffer *cmd_buffer; + mtl_command_encoder *encoder; + + /* Used to know if we need to make heaps resident again */ + uint32_t user_heap_hash; + + /* Need to track last used to we can converge at submission */ + enum kk_encoder_type last_used; + + /* Used to synchronize between passes inside the same command buffer */ + struct util_dynarray fences; + /* Tracks if we need to wait on the last fence present in fences at the start + * of the pass */ + bool wait_fence; +}; + +struct kk_copy_query_pool_results_info { + uint64_t availability; + uint64_t results; + uint64_t indices; + uint64_t dst_addr; + uint64_t dst_stride; + uint32_t first_query; + VkQueryResultFlagBits flags; + uint16_t reports_per_query; + uint32_t query_count; +}; + +struct kk_encoder { + mtl_device *dev; + struct kk_encoder_internal main; + /* Compute only for pre gfx required work */ + struct kk_encoder_internal pre_gfx; + + /* Used to synchronize between main and pre_gfx encoders */ + mtl_event *event; + uint64_t event_value; + /* Track what values pre_gfx must wait/signal before starting the encoding */ + uint64_t wait_value_pre_gfx; + uint64_t signal_value_pre_gfx; + + /* uint64_t pairs with first being the address, second being the value to + * write */ + struct util_dynarray imm_writes; + /* mtl_buffers (destination buffers) so we can make them resident before the + * dispatch */ + struct util_dynarray resident_buffers; + /* Array of kk_copy_quer_pool_results_info structs */ + struct util_dynarray copy_query_pool_result_infos; +}; + +/* Allocates encoder and initialises/creates all resources required to start + * recording commands into the multiple encoders */ +VkResult kk_encoder_init(mtl_device *device, struct kk_queue *queue, + struct kk_encoder **encoder); + +/* Submits all command buffers and releases encoder memory. Requires all command + * buffers in the encoder to be linked to the last one used so the post + * execution callback is called once all are done */ +void kk_encoder_submit(struct kk_encoder *encoder); + +mtl_render_encoder * +kk_encoder_start_render(struct kk_cmd_buffer *cmd, + mtl_render_pass_descriptor *descriptor, + uint32_t view_mask); + +mtl_compute_encoder *kk_encoder_start_compute(struct kk_cmd_buffer *cmd); + +mtl_compute_encoder *kk_encoder_start_blit(struct kk_cmd_buffer *cmd); + +/* Ends encoding on all command buffers */ +void kk_encoder_end(struct kk_cmd_buffer *cmd); + +/* Creates a fence and signals it inside the encoder, then ends encoding */ +void kk_encoder_signal_fence_and_end(struct kk_cmd_buffer *cmd); + +mtl_render_encoder *kk_render_encoder(struct kk_cmd_buffer *cmd); + +mtl_compute_encoder *kk_compute_encoder(struct kk_cmd_buffer *cmd); + +mtl_blit_encoder *kk_blit_encoder(struct kk_cmd_buffer *cmd); + +struct kk_encoder_internal *kk_encoder_get_internal(struct kk_encoder *encoder, + enum kk_encoder_type type); + +void upload_queue_writes(struct kk_cmd_buffer *cmd); + +void kk_encoder_render_triangle_fan_indirect(struct kk_cmd_buffer *cmd, + mtl_buffer *indirect, + uint64_t offset); + +void kk_encoder_render_triangle_fan_indexed_indirect(struct kk_cmd_buffer *cmd, + mtl_buffer *indirect, + uint64_t offset, + bool increase_el_size); + +#endif /* KK_ENCODER_H */ diff --git a/src/kosmickrisp/vulkan/kk_event.c b/src/kosmickrisp/vulkan/kk_event.c new file mode 100644 index 00000000000..3a16d19e49e --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_event.c @@ -0,0 +1,143 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_event.h" + +#include "kk_bo.h" +#include "kk_cmd_buffer.h" +#include "kk_device.h" +#include "kk_encoder.h" +#include "kk_entrypoints.h" + +#define KK_EVENT_MEM_SIZE sizeof(uint64_t) + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateEvent(VkDevice device, const VkEventCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkEvent *pEvent) +{ + VK_FROM_HANDLE(kk_device, dev, device); + struct kk_event *event; + VkResult result = VK_SUCCESS; + + event = vk_object_zalloc(&dev->vk, pAllocator, sizeof(*event), + VK_OBJECT_TYPE_EVENT); + if (!event) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + /* TODO_KOSMICKRISP Bring back the heap. */ + result = kk_alloc_bo(dev, &dev->vk.base, KK_EVENT_MEM_SIZE, + KK_EVENT_MEM_SIZE, &event->bo); + if (result != VK_SUCCESS) { + vk_object_free(&dev->vk, pAllocator, event); + return result; + } + + event->status = event->bo->cpu; + event->addr = event->bo->gpu; + *event->status = VK_EVENT_RESET; + + *pEvent = kk_event_to_handle(event); + + return result; +} + +VKAPI_ATTR void VKAPI_CALL +kk_DestroyEvent(VkDevice device, VkEvent _event, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_event, event, _event); + + if (!event) + return; + + kk_destroy_bo(dev, event->bo); + + vk_object_free(&dev->vk, pAllocator, event); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_GetEventStatus(VkDevice device, VkEvent _event) +{ + VK_FROM_HANDLE(kk_event, event, _event); + + return *event->status; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_SetEvent(VkDevice device, VkEvent _event) +{ + VK_FROM_HANDLE(kk_event, event, _event); + + *event->status = VK_EVENT_SET; + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_ResetEvent(VkDevice device, VkEvent _event) +{ + VK_FROM_HANDLE(kk_event, event, _event); + + *event->status = VK_EVENT_RESET; + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, + const VkDependencyInfo *pDependencyInfo) +{ + VK_FROM_HANDLE(kk_event, event, _event); + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + enum kk_encoder_type last_used = cmd->encoder->main.last_used; + kk_cmd_write(cmd, event->bo->map, event->addr, VK_EVENT_SET); + if (last_used != KK_ENC_NONE) + kk_encoder_signal_fence_and_end(cmd); + else + upload_queue_writes(cmd); + + /* If we were inside a render pass, restart it loading attachments */ + if (last_used == KK_ENC_RENDER) { + struct kk_graphics_state *state = &cmd->state.gfx; + assert(state->render_pass_descriptor); + kk_encoder_start_render(cmd, state->render_pass_descriptor, + state->render.view_mask); + kk_cmd_buffer_dirty_all_gfx(cmd); + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, + VkPipelineStageFlags2 stageMask) +{ + VK_FROM_HANDLE(kk_event, event, _event); + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + enum kk_encoder_type last_used = cmd->encoder->main.last_used; + kk_cmd_write(cmd, event->bo->map, event->addr, VK_EVENT_RESET); + if (last_used != KK_ENC_NONE) + kk_encoder_signal_fence_and_end(cmd); + else + upload_queue_writes(cmd); + + /* If we were inside a render pass, restart it loading attachments */ + if (last_used == KK_ENC_RENDER) { + struct kk_graphics_state *state = &cmd->state.gfx; + assert(state->render_pass_descriptor); + kk_encoder_start_render(cmd, state->render_pass_descriptor, + state->render.view_mask); + kk_cmd_buffer_dirty_all_gfx(cmd); + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, + const VkEvent *pEvents, + const VkDependencyInfo *pDependencyInfos) +{ + /* We do nothing, event should already be set by the time we are here. */ +} diff --git a/src/kosmickrisp/vulkan/kk_event.h b/src/kosmickrisp/vulkan/kk_event.h new file mode 100644 index 00000000000..4d6b7a14dcc --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_event.h @@ -0,0 +1,27 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_EVENT_H +#define KK_EVENT_H 1 + +#include "kk_private.h" + +#include "vk_object.h" + +struct kk_bo; + +struct kk_event { + struct vk_object_base base; + struct kk_bo *bo; + + uint64_t addr; + uint64_t *status; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) + +#endif /* KK_EVENT_H */ diff --git a/src/kosmickrisp/vulkan/kk_format.c b/src/kosmickrisp/vulkan/kk_format.c new file mode 100644 index 00000000000..1a0347d5ac6 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_format.c @@ -0,0 +1,359 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_format.h" + +#include "kk_buffer_view.h" +#include "kk_entrypoints.h" +#include "kk_image.h" +#include "kk_physical_device.h" + +#include "kosmickrisp/bridge/mtl_format.h" + +#include "vk_enum_defines.h" +#include "vk_format.h" + +#define MTL_FMT_ALL_NO_ATOMIC(width) \ + .bit_widths = width, .filter = 1u, .write = 1u, .color = 1u, .blend = 1u, \ + .msaa = 1u, .resolve = 1u, .sparse = 1u, .atomic = 0u + +// Filter, Write, Color, Blend, MSAA, Sparse +#define MTL_FMT_FWCBMS(width) \ + .bit_widths = width, .filter = 1u, .write = 1u, .color = 1u, .blend = 1u, \ + .msaa = 1u, .resolve = 0u, .sparse = 1u, .atomic = 0u + +// Filter, Color, Blend, MSAA, Resolve, Sparse +#define MTL_FMT_FCBMRS(width) \ + .bit_widths = width, .filter = 1u, .write = 0u, .color = 1u, .blend = 1u, \ + .msaa = 1u, .resolve = 1u, .sparse = 1u, .atomic = 0u + +// Filter, Write, Color, Blend, MSAA +#define MTL_FMT_FWCBM(width) \ + .bit_widths = width, .filter = 1u, .write = 1u, .color = 1u, .blend = 1u, \ + .msaa = 1u, .resolve = 0u, .sparse = 0u, .atomic = 0u + +// Write, Color, Blend, MSAA, Sparse +#define MTL_FMT_WCBMS(width) \ + .bit_widths = width, .filter = 0u, .write = 1u, .color = 1u, .blend = 1u, \ + .msaa = 1u, .resolve = 0u, .sparse = 0u, .atomic = 0u + +// Write, Color, MSAA, Sparse +#define MTL_FMT_WCMS(width) \ + .bit_widths = width, .filter = 0u, .write = 1u, .color = 1u, .blend = 0u, \ + .msaa = 1u, .resolve = 0u, .sparse = 1u, .atomic = 0u + +// Write, Color, Sparse, Atomic +#define MTL_FMT_WCSA(width) \ + .bit_widths = width, .filter = 0u, .write = 1u, .color = 1u, .blend = 0u, \ + .msaa = 0u, .resolve = 0u, .sparse = 1u, .atomic = 1u + +// Write, Color, Sparse +#define MTL_FMT_WCS(width) \ + .bit_widths = width, .filter = 0u, .write = 1u, .color = 1u, .blend = 0u, \ + .msaa = 0u, .resolve = 0u, .sparse = 1u, .atomic = 0u + +// Filter, MSAA, Resolve +#define MTL_FMT_FMR(width) \ + .bit_widths = width, .filter = 1u, .write = 0u, .color = 0u, .blend = 0u, \ + .msaa = 1u, .resolve = 1u, .sparse = 0u, .atomic = 0u + +// Filter, Sparse +#define MTL_FMT_FS(width) \ + .bit_widths = width, .filter = 1u, .write = 0u, .color = 0u, .blend = 0u, \ + .msaa = 0u, .resolve = 0u, .sparse = 1u, .atomic = 0u + +// MSAA, Resolve +#define MTL_FMT_MR(width) \ + .bit_widths = width, .filter = 0u, .write = 0u, .color = 0u, .blend = 0u, \ + .msaa = 1u, .resolve = 1u, .sparse = 0u, .atomic = 0u + +// MSAA +#define MTL_FMT_M(width) \ + .bit_widths = width, .filter = 0u, .write = 0u, .color = 0u, .blend = 0u, \ + .msaa = 1u, .resolve = 0u, .sparse = 0u, .atomic = 0u + +#define MTL_FMT_TB_ALL \ + .texel_buffer = { \ + .write = 1u, \ + .read = 1u, \ + .read_write = 1u, \ + } + +#define MTL_FMT_TB_WR \ + .texel_buffer = { \ + .write = 1u, \ + .read = 1u, \ + .read_write = 0u, \ + } + +#define MTL_FMT_TB_R \ + .texel_buffer = { \ + .write = 0u, \ + .read = 1u, \ + .read_write = 0u, \ + } + +#define MTL_FMT_TB_NONE \ + .texel_buffer = { \ + .write = 0u, \ + .read = 0u, \ + .read_write = 0u, \ + } + +#define MTL_SWIZZLE_IDENTITY \ + .swizzle = { \ + .red = PIPE_SWIZZLE_X, \ + .green = PIPE_SWIZZLE_Y, \ + .blue = PIPE_SWIZZLE_Z, \ + .alpha = PIPE_SWIZZLE_W, \ + } + +#define MTL_SWIZZLE_ABGR \ + .swizzle = { \ + .red = PIPE_SWIZZLE_W, \ + .green = PIPE_SWIZZLE_Z, \ + .blue = PIPE_SWIZZLE_Y, \ + .alpha = PIPE_SWIZZLE_X, \ + } + +#define MTL_SWIZZLE_BGRA \ + .swizzle = { \ + .red = PIPE_SWIZZLE_Z, \ + .green = PIPE_SWIZZLE_Y, \ + .blue = PIPE_SWIZZLE_X, \ + .alpha = PIPE_SWIZZLE_W, \ + } + +#define MTL_FMT(pipe_format, mtl_format, swizzle, capabilities, \ + texel_buffer_capabilities, native) \ + [PIPE_FORMAT_## \ + pipe_format] = {.mtl_pixel_format = MTL_PIXEL_FORMAT_##mtl_format, \ + swizzle, \ + capabilities, \ + texel_buffer_capabilities, \ + .is_native = native} + +#define MTL_FMT_NATIVE(format, capabilities, texel_buffer_capabilities) \ + [PIPE_FORMAT_##format] = {.mtl_pixel_format = MTL_PIXEL_FORMAT_##format, \ + MTL_SWIZZLE_IDENTITY, \ + capabilities, \ + texel_buffer_capabilities, \ + .is_native = 1} + +static const struct kk_va_format kk_vf_formats[] = { + // 8-bit formats + MTL_FMT_NATIVE(R8_UNORM, MTL_FMT_ALL_NO_ATOMIC(8), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(A8_UNORM, MTL_FMT_ALL_NO_ATOMIC(8), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R8_SRGB, MTL_FMT_ALL_NO_ATOMIC(8), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(R8_SNORM, MTL_FMT_ALL_NO_ATOMIC(8), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R8_UINT, MTL_FMT_WCMS(8), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R8_SINT, MTL_FMT_WCMS(8), MTL_FMT_TB_ALL), + + // 16-bit formats + MTL_FMT_NATIVE(R16_UNORM, MTL_FMT_FWCBMS(16), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R16_SNORM, MTL_FMT_FWCBMS(16), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R16_UINT, MTL_FMT_WCMS(16), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R16_SINT, MTL_FMT_WCMS(16), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R16_FLOAT, MTL_FMT_ALL_NO_ATOMIC(16), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R8G8_UNORM, MTL_FMT_ALL_NO_ATOMIC(16), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R8G8_SNORM, MTL_FMT_ALL_NO_ATOMIC(16), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R8G8_SRGB, MTL_FMT_ALL_NO_ATOMIC(16), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(R8G8_UINT, MTL_FMT_WCMS(16), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R8G8_SINT, MTL_FMT_WCMS(16), MTL_FMT_TB_WR), + + // 32-bit formats + MTL_FMT_NATIVE(R32_UINT, MTL_FMT_WCSA(32), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R32_SINT, MTL_FMT_WCSA(32), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R32_FLOAT, MTL_FMT_WCBMS(32), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R16G16_UNORM, MTL_FMT_FWCBMS(32), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R16G16_SNORM, MTL_FMT_FWCBMS(32), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R16G16_UINT, MTL_FMT_WCMS(32), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R16G16_SINT, MTL_FMT_WCMS(32), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R16G16_FLOAT, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R8G8B8A8_UNORM, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R8G8B8A8_SNORM, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R8G8B8A8_SRGB, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(R8G8B8A8_UINT, MTL_FMT_WCMS(32), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R8G8B8A8_SINT, MTL_FMT_WCMS(32), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(B8G8R8A8_UNORM, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_R), + MTL_FMT_NATIVE(B8G8R8A8_SRGB, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_NONE), + + // 64-bit formats + MTL_FMT_NATIVE(R32G32_UINT, MTL_FMT_WCMS(64), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R32G32_SINT, MTL_FMT_WCMS(64), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R32G32_FLOAT, MTL_FMT_WCBMS(64), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R16G16B16A16_UNORM, MTL_FMT_FWCBMS(64), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R16G16B16A16_SNORM, MTL_FMT_FWCBMS(64), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R16G16B16A16_UINT, MTL_FMT_WCMS(64), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R16G16B16A16_SINT, MTL_FMT_WCMS(64), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R16G16B16A16_FLOAT, MTL_FMT_ALL_NO_ATOMIC(64), + MTL_FMT_TB_ALL), + + // 128-bit formats + MTL_FMT_NATIVE(R32G32B32A32_UINT, MTL_FMT_WCS(128), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R32G32B32A32_SINT, MTL_FMT_WCS(128), MTL_FMT_TB_ALL), + MTL_FMT_NATIVE(R32G32B32A32_FLOAT, MTL_FMT_WCMS(128), MTL_FMT_TB_ALL), + + // 16-bit packed formats + MTL_FMT_NATIVE(B5G6R5_UNORM, MTL_FMT_FCBMRS(16), MTL_FMT_TB_NONE), + /* Hardware has issues with border color opaque black, and since it's not + * required by Vulkan, we can just disable it. + */ + /* MTL_FMT_NATIVE(A1B5G5R5_UNORM, MTL_FMT_FCBMRS(16), MTL_FMT_TB_NONE), */ + MTL_FMT_NATIVE(A4B4G4R4_UNORM, MTL_FMT_FCBMRS(16), MTL_FMT_TB_NONE), + MTL_FMT(R4G4B4A4_UNORM, A4B4G4R4_UNORM, MTL_SWIZZLE_ABGR, MTL_FMT_FCBMRS(16), + MTL_FMT_TB_NONE, false), + MTL_FMT(A4R4G4B4_UNORM, A4B4G4R4_UNORM, MTL_SWIZZLE_BGRA, MTL_FMT_FCBMRS(16), + MTL_FMT_TB_NONE, false), + MTL_FMT_NATIVE(B5G5R5A1_UNORM, MTL_FMT_FCBMRS(16), MTL_FMT_TB_NONE), + + // 32-bit packed formats + MTL_FMT_NATIVE(R10G10B10A2_UNORM, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(B10G10R10A2_UNORM, MTL_FMT_ALL_NO_ATOMIC(32), + MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(R10G10B10A2_UINT, MTL_FMT_WCMS(32), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R11G11B10_FLOAT, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_WR), + MTL_FMT_NATIVE(R9G9B9E5_FLOAT, MTL_FMT_ALL_NO_ATOMIC(32), MTL_FMT_TB_NONE), + + // ASTC formats + MTL_FMT_NATIVE(ASTC_4x4, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_5x4, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_5x5, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_6x5, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_6x6, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_8x5, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_8x6, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_8x8, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_10x5, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_10x6, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_10x8, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_10x10, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_12x10, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_12x12, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + + MTL_FMT_NATIVE(ASTC_4x4_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_5x4_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_5x5_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_6x5_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_6x6_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_8x5_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_8x6_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_8x8_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_10x5_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_10x6_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_10x8_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_10x10_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_12x10_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ASTC_12x12_SRGB, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + + // EAC/ETC formats + MTL_FMT_NATIVE(ETC2_R11_UNORM, MTL_FMT_FS(64), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ETC2_R11_SNORM, MTL_FMT_FS(64), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ETC2_RG11_UNORM, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ETC2_RG11_SNORM, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ETC2_RGBA8, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ETC2_SRGBA8, MTL_FMT_FS(128), MTL_FMT_TB_NONE), + + MTL_FMT_NATIVE(ETC2_RGB8, MTL_FMT_FS(64), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ETC2_SRGB8, MTL_FMT_FS(64), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ETC2_RGB8A1, MTL_FMT_FS(64), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(ETC2_SRGB8A1, MTL_FMT_FS(64), MTL_FMT_TB_NONE), + + // Compressed PVRTC, HDR ASTC, BC TODO_KOSMICKRISP + // YUV formats TODO_KOSMICKRISP + // Extended range and wide color formats TODO_KOSMICKRISP + + // Depth and stencil formats + MTL_FMT_NATIVE(Z16_UNORM, MTL_FMT_FMR(16), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(Z32_FLOAT, MTL_FMT_MR(32), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(S8_UINT, MTL_FMT_M(8), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(Z32_FLOAT_S8X24_UINT, MTL_FMT_MR(64), MTL_FMT_TB_NONE), + MTL_FMT_NATIVE(X32_S8X24_UINT, MTL_FMT_MR(64), MTL_FMT_TB_NONE), +}; + +#undef MTL_FMT_NATIVE +#undef MTL_FMT + +#undef MTL_SWIZZLE_BGRA +#undef MTL_SWIZZLE_ABGR +#undef MTL_SWIZZLE_IDENTITY + +#undef MTL_FMT_ALL_NO_ATOMIC +#undef MTL_FMT_FWCBMS +#undef MTL_FMT_FCBMRS +#undef MTL_FMT_FWCBM +#undef MTL_FMT_WCBMS +#undef MTL_FMT_WCMS +#undef MTL_FMT_WCSA +#undef MTL_FMT_WCS +#undef MTL_FMT_FMR +#undef MTL_FMT_FS +#undef MTL_FMT_MR +#undef MTL_FMT_M + +#undef MTL_FMT_TB_ALL +#undef MTL_FMT_TB_WR +#undef MTL_FMT_TB_R +#undef MTL_FMT_TB_NONE + +const struct kk_va_format * +kk_get_va_format(enum pipe_format format) +{ + if (format >= ARRAY_SIZE(kk_vf_formats)) + return NULL; + + if (kk_vf_formats[format].bit_widths == 0) + return NULL; + + return &kk_vf_formats[format]; +} + +enum mtl_pixel_format +vk_format_to_mtl_pixel_format(VkFormat vkformat) +{ + enum pipe_format format = vk_format_to_pipe_format(vkformat); + const struct kk_va_format *supported_format = kk_get_va_format(format); + assert(supported_format); + return supported_format->mtl_pixel_format; +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, + VkFormat format, + VkFormatProperties2 *pFormatProperties) +{ + VK_FROM_HANDLE(kk_physical_device, pdevice, physicalDevice); + + VkFormatFeatureFlags2 linear2, optimal2, buffer2; + linear2 = + kk_get_image_format_features(pdevice, format, VK_IMAGE_TILING_LINEAR, 0); + optimal2 = + kk_get_image_format_features(pdevice, format, VK_IMAGE_TILING_OPTIMAL, 0); + buffer2 = kk_get_buffer_format_features(pdevice, format); + + pFormatProperties->formatProperties = (VkFormatProperties){ + .linearTilingFeatures = vk_format_features2_to_features(linear2), + .optimalTilingFeatures = vk_format_features2_to_features(optimal2), + .bufferFeatures = vk_format_features2_to_features(buffer2), + }; + + vk_foreach_struct(ext, pFormatProperties->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3: { + VkFormatProperties3 *p = (void *)ext; + p->linearTilingFeatures = linear2; + p->optimalTilingFeatures = optimal2; + p->bufferFeatures = buffer2; + break; + } + + default: + vk_debug_ignored_stype(ext->sType); + break; + } + } +} diff --git a/src/kosmickrisp/vulkan/kk_format.h b/src/kosmickrisp/vulkan/kk_format.h new file mode 100644 index 00000000000..64541a659e5 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_format.h @@ -0,0 +1,55 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_FORMAT_H +#define KK_FORMAT_H 1 + +#include "kk_private.h" + +#include "util/format/u_format.h" + +struct kk_physical_device; +enum pipe_format; +enum mtl_pixel_format; + +struct kk_va_format { + /* Would love to use enum pipe_swizzle, but it's bigger than the required + * type for util_format_compose_swizzles... */ + struct { + union { + struct { + uint8_t red; + uint8_t green; + uint8_t blue; + uint8_t alpha; + }; + uint8_t channels[4]; + }; + } swizzle; + uint32_t mtl_pixel_format; + uint8_t bit_widths; + uint8_t filter : 1; + uint8_t write : 1; + uint8_t color : 1; + uint8_t blend : 1; + uint8_t msaa : 1; + uint8_t resolve : 1; + uint8_t sparse : 1; + uint8_t atomic : 1; + struct { + uint8_t write : 1; + uint8_t read : 1; + uint8_t read_write : 1; + } texel_buffer; + uint8_t is_native : 1; +}; + +const struct kk_va_format *kk_get_va_format(enum pipe_format format); + +enum mtl_pixel_format vk_format_to_mtl_pixel_format(enum VkFormat vkformat); + +#endif /* KK_FORMAT_H */ diff --git a/src/kosmickrisp/vulkan/kk_image.c b/src/kosmickrisp/vulkan/kk_image.c new file mode 100644 index 00000000000..0b05fc82002 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_image.c @@ -0,0 +1,967 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_image.h" + +#include "kk_device.h" +#include "kk_device_memory.h" +#include "kk_entrypoints.h" +#include "kk_format.h" +#include "kk_physical_device.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +#include "vk_enum_defines.h" +#include "vk_enum_to_str.h" +#include "vk_format.h" +#include "wsi_common_private.h" + +static VkFormatFeatureFlags2 +kk_get_image_plane_format_features(struct kk_physical_device *pdev, + VkFormat vk_format, VkImageTiling tiling, + uint64_t drm_format_mod) +{ + VkFormatFeatureFlags2 features = 0; + + /* Metal does not support linear tiling for compressed formats */ + if (tiling == VK_IMAGE_TILING_LINEAR && vk_format_is_compressed(vk_format)) + return 0; + + enum pipe_format p_format = vk_format_to_pipe_format(vk_format); + if (p_format == PIPE_FORMAT_NONE) + return 0; + + /* You can't tile a non-power-of-two */ + if (!util_is_power_of_two_nonzero(util_format_get_blocksize(p_format))) + return 0; + + const struct kk_va_format *va_format = kk_get_va_format(p_format); + if (va_format == NULL) + return 0; + + // Textures can at least be sampled + features |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT; + features |= VK_FORMAT_FEATURE_2_BLIT_SRC_BIT; + + if (va_format->filter) { + features |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + features |= + VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_MINMAX_BIT; // TODO_KOSMICKRISP + // Understand if + // we want to + // expose this + } + + /* TODO: VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_DEPTH_COMPARISON_BIT */ + if (vk_format_has_depth(vk_format)) { + features |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_DEPTH_COMPARISON_BIT; + } + + /* We disable A8 format due to lower blend pass issues */ + if (va_format->color && tiling != VK_IMAGE_TILING_LINEAR && + vk_format != VK_FORMAT_A8_UNORM) { + features |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT; + features |= VK_FORMAT_FEATURE_2_BLIT_DST_BIT; + // TODO_KOSMICKRISP Support snorm formats once the following spec issue is + // resolved: https://gitlab.khronos.org/vulkan/vulkan/-/issues/4293 + if (!vk_format_is_snorm(vk_format)) + features |= VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT; + } + + if (vk_format_is_depth_or_stencil(vk_format)) { + if (tiling == VK_IMAGE_TILING_LINEAR) + return 0; + + features |= VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT; + } + + if (va_format->write) { + features |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT; + } + + if (va_format->atomic) + features |= VK_FORMAT_FEATURE_2_STORAGE_IMAGE_ATOMIC_BIT; + + if (features != 0) { + features |= VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT; + features |= VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT; + } + + return features; +} + +VkFormatFeatureFlags2 +kk_get_image_format_features(struct kk_physical_device *pdev, + VkFormat vk_format, VkImageTiling tiling, + uint64_t drm_format_mod) +{ + const struct vk_format_ycbcr_info *ycbcr_info = + vk_format_get_ycbcr_info(vk_format); + if (ycbcr_info == NULL) { + return kk_get_image_plane_format_features(pdev, vk_format, tiling, + drm_format_mod); + } + + /* For multi-plane, we get the feature flags of each plane separately, + * then take their intersection as the overall format feature flags + */ + VkFormatFeatureFlags2 features = ~0ull; + bool cosited_chroma = false; + for (uint8_t plane = 0; plane < ycbcr_info->n_planes; plane++) { + const struct vk_format_ycbcr_plane *plane_info = + &ycbcr_info->planes[plane]; + features &= kk_get_image_plane_format_features(pdev, plane_info->format, + tiling, drm_format_mod); + if (plane_info->denominator_scales[0] > 1 || + plane_info->denominator_scales[1] > 1) + cosited_chroma = true; + } + if (features == 0) + return 0; + + /* Uh... We really should be able to sample from YCbCr */ + assert(features & VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT); + assert(features & VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_FILTER_LINEAR_BIT); + + /* These aren't allowed for YCbCr formats */ + features &= + ~(VK_FORMAT_FEATURE_2_BLIT_SRC_BIT | VK_FORMAT_FEATURE_2_BLIT_DST_BIT | + VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BLEND_BIT | + VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); + + /* This is supported on all YCbCr formats */ + features |= + VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT; + + if (ycbcr_info->n_planes > 1) { + /* DISJOINT_BIT implies that each plane has its own separate binding, + * while SEPARATE_RECONSTRUCTION_FILTER_BIT implies that luma and chroma + * each have their own, separate filters, so these two bits make sense + * for multi-planar formats only. + * + * For MIDPOINT_CHROMA_SAMPLES_BIT, NVIDIA HW on single-plane interleaved + * YCbCr defaults to COSITED_EVEN, which is inaccurate and fails tests. + * This can be fixed with a NIR tweak but for now, we only enable this bit + * for multi-plane formats. See Issue #9525 on the mesa/main tracker. + */ + features |= + VK_FORMAT_FEATURE_DISJOINT_BIT | + VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_YCBCR_CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT | + VK_FORMAT_FEATURE_2_MIDPOINT_CHROMA_SAMPLES_BIT; + } + + if (cosited_chroma) + features |= VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT; + + return features; +} + +static VkFormatFeatureFlags2 +vk_image_usage_to_format_features(VkImageUsageFlagBits usage_flag) +{ + assert(util_bitcount(usage_flag) == 1); + switch (usage_flag) { + case VK_IMAGE_USAGE_TRANSFER_SRC_BIT: + return VK_FORMAT_FEATURE_2_TRANSFER_SRC_BIT | + VK_FORMAT_FEATURE_BLIT_SRC_BIT; + case VK_IMAGE_USAGE_TRANSFER_DST_BIT: + return VK_FORMAT_FEATURE_2_TRANSFER_DST_BIT | + VK_FORMAT_FEATURE_BLIT_DST_BIT; + case VK_IMAGE_USAGE_SAMPLED_BIT: + return VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_BIT; + case VK_IMAGE_USAGE_STORAGE_BIT: + return VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT; + case VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT: + return VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT; + case VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT: + return VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT; + case VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT: + return VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT; + case VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR: + return VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR; + default: + return 0; + } +} + +uint32_t +kk_image_max_dimension(VkImageType image_type) +{ + /* Values taken from Apple7 + * https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */ + switch (image_type) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + return 16384; + case VK_IMAGE_TYPE_3D: + return 2048; + default: + UNREACHABLE("Invalid image type"); + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_GetPhysicalDeviceImageFormatProperties2( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo, + VkImageFormatProperties2 *pImageFormatProperties) +{ + VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice); + + const VkPhysicalDeviceExternalImageFormatInfo *external_info = + vk_find_struct_const(pImageFormatInfo->pNext, + PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO); + + /* Initialize to zero in case we return VK_ERROR_FORMAT_NOT_SUPPORTED */ + memset(&pImageFormatProperties->imageFormatProperties, 0, + sizeof(pImageFormatProperties->imageFormatProperties)); + + /* Metal does not support depth/stencil textures that are not 2D (we make 1D + * textures 2D) */ + if (vk_format_is_depth_or_stencil(pImageFormatInfo->format) && + pImageFormatInfo->type == VK_IMAGE_TYPE_3D) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + /* Metal does not support EAC/ETC formats for 3D textures. */ + if (util_format_is_etc(vk_format_to_pipe_format(pImageFormatInfo->format)) && + pImageFormatInfo->type == VK_IMAGE_TYPE_3D) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + /* Metal disallows reading compressed formats as uncompressed format. + * VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT is only used with + * compressed formats. + */ + if (pImageFormatInfo->flags & + VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + const struct vk_format_ycbcr_info *ycbcr_info = + vk_format_get_ycbcr_info(pImageFormatInfo->format); + + /* For the purposes of these checks, we don't care about all the extra + * YCbCr features and we just want the accumulation of features available + * to all planes of the given format. + */ + VkFormatFeatureFlags2 features; + if (ycbcr_info == NULL) { + features = kk_get_image_plane_format_features( + pdev, pImageFormatInfo->format, pImageFormatInfo->tiling, 0u); + } else { + features = ~0ull; + assert(ycbcr_info->n_planes > 0); + for (uint8_t plane = 0; plane < ycbcr_info->n_planes; plane++) { + const VkFormat plane_format = ycbcr_info->planes[plane].format; + features &= kk_get_image_plane_format_features( + pdev, plane_format, pImageFormatInfo->tiling, 0u); + } + } + + if (features == 0) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if (pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR && + pImageFormatInfo->type == VK_IMAGE_TYPE_3D) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + /* TODO_KOSMICKRISP We could allow linear images that are used as render + * target as long as they are not used as input attachments. Main reason for + * this is that we expect arrays when rendering and reading from input + * attachments and Metal disallows arrays for linear textures. + */ + if (pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR && + (pImageFormatInfo->usage & + (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if (ycbcr_info && pImageFormatInfo->type != VK_IMAGE_TYPE_2D) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + /* Don't support sparse residency */ + if ((pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + /* From the Vulkan 1.3.279 spec: + * + * VUID-VkImageCreateInfo-tiling-04121 + * + * "If tiling is VK_IMAGE_TILING_LINEAR, flags must not contain + * VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT" + * + * VUID-VkImageCreateInfo-imageType-00970 + * + * "If imageType is VK_IMAGE_TYPE_1D, flags must not contain + * VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT" + */ + if ((pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) && + (pImageFormatInfo->type == VK_IMAGE_TYPE_1D || + pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + /* From the Vulkan 1.3.279 spec: + * + * VUID-VkImageCreateInfo-flags-09403 + * + * "If flags contains VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT, flags + * must not include VK_IMAGE_CREATE_SPARSE_ALIASED_BIT, + * VK_IMAGE_CREATE_SPARSE_BINDING_BIT, or + * VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT" + */ + if ((pImageFormatInfo->flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT) && + (pImageFormatInfo->flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT | + VK_IMAGE_CREATE_SPARSE_BINDING_BIT | + VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT))) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if (pImageFormatInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && + pImageFormatInfo->type != VK_IMAGE_TYPE_2D) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + const uint32_t max_dim = kk_image_max_dimension(pImageFormatInfo->type); + assert(util_is_power_of_two_nonzero(max_dim)); + uint32_t maxMipLevels = util_logbase2(max_dim) + 1; + VkExtent3D maxExtent; + uint32_t maxArraySize; + switch (pImageFormatInfo->type) { + case VK_IMAGE_TYPE_1D: + maxExtent = (VkExtent3D){max_dim, 1, 1}; + maxArraySize = 2048u; + break; + case VK_IMAGE_TYPE_2D: + maxExtent = (VkExtent3D){max_dim, max_dim, 1}; + maxArraySize = 2048u; + break; + case VK_IMAGE_TYPE_3D: + maxExtent = (VkExtent3D){max_dim, max_dim, max_dim}; + maxArraySize = 1u; + break; + default: + UNREACHABLE("Invalid image type"); + } + if (pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR) + maxArraySize = 1; + + if (ycbcr_info != NULL || pImageFormatInfo->tiling == VK_IMAGE_TILING_LINEAR) + maxMipLevels = 1; + + if (pImageFormatInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) { + maxArraySize = 1; + maxMipLevels = 1; + } + + VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT; + if (pImageFormatInfo->tiling == VK_IMAGE_TILING_OPTIMAL && + pImageFormatInfo->type == VK_IMAGE_TYPE_2D && ycbcr_info == NULL && + (features & (VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT)) && + !(pImageFormatInfo->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)) { + sampleCounts = + VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | + // TODO_KOSMICKRISP Modify sample count based on what pdev supports + VK_SAMPLE_COUNT_4_BIT /* | + VK_SAMPLE_COUNT_8_BIT */ + ; + } + + /* From the Vulkan 1.2.199 spec: + * + * "VK_IMAGE_CREATE_EXTENDED_USAGE_BIT specifies that the image can be + * created with usage flags that are not supported for the format the + * image is created with but are supported for at least one format a + * VkImageView created from the image can have." + * + * If VK_IMAGE_CREATE_EXTENDED_USAGE_BIT is set, views can be created with + * different usage than the image so we can't always filter on usage. + * There is one exception to this below for storage. + */ + const VkImageUsageFlags image_usage = pImageFormatInfo->usage; + VkImageUsageFlags view_usage = image_usage; + if (pImageFormatInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) + view_usage = 0; + + u_foreach_bit(b, view_usage) { + VkFormatFeatureFlags2 usage_features = + vk_image_usage_to_format_features(1 << b); + if (usage_features && !(features & usage_features)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + } + + const VkExternalMemoryProperties *ext_mem_props = NULL; + if (external_info != NULL && external_info->handleType != 0) { + /* We only support heaps since that's the backing for all our memory and + * simplifies implementation */ + switch (external_info->handleType) { + case VK_EXTERNAL_MEMORY_HANDLE_TYPE_MTLHEAP_BIT_EXT: + ext_mem_props = &kk_mtlheap_mem_props; + break; + default: + /* From the Vulkan 1.3.256 spec: + * + * "If handleType is not compatible with the [parameters] in + * VkPhysicalDeviceImageFormatInfo2, then + * vkGetPhysicalDeviceImageFormatProperties2 returns + * VK_ERROR_FORMAT_NOT_SUPPORTED." + */ + return vk_errorf(pdev, VK_ERROR_FORMAT_NOT_SUPPORTED, + "unsupported VkExternalMemoryHandleTypeFlagBits: %s ", + vk_ExternalMemoryHandleTypeFlagBits_to_str( + external_info->handleType)); + } + } + + const unsigned plane_count = + vk_format_get_plane_count(pImageFormatInfo->format); + + /* From the Vulkan 1.3.259 spec, VkImageCreateInfo: + * + * VUID-VkImageCreateInfo-imageCreateFormatFeatures-02260 + * + * "If format is a multi-planar format, and if imageCreateFormatFeatures + * (as defined in Image Creation Limits) does not contain + * VK_FORMAT_FEATURE_DISJOINT_BIT, then flags must not contain + * VK_IMAGE_CREATE_DISJOINT_BIT" + * + * This is satisfied trivially because we support DISJOINT on all + * multi-plane formats. Also, + * + * VUID-VkImageCreateInfo-format-01577 + * + * "If format is not a multi-planar format, and flags does not include + * VK_IMAGE_CREATE_ALIAS_BIT, flags must not contain + * VK_IMAGE_CREATE_DISJOINT_BIT" + */ + if (plane_count == 1 && + !(pImageFormatInfo->flags & VK_IMAGE_CREATE_ALIAS_BIT) && + (pImageFormatInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if (ycbcr_info && + ((pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) || + (pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT))) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + if ((pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && + (pImageFormatInfo->usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT)) + return VK_ERROR_FORMAT_NOT_SUPPORTED; + + pImageFormatProperties->imageFormatProperties = (VkImageFormatProperties){ + .maxExtent = maxExtent, + .maxMipLevels = maxMipLevels, + .maxArrayLayers = maxArraySize, + .sampleCounts = sampleCounts, + .maxResourceSize = UINT32_MAX, /* TODO */ + }; + + vk_foreach_struct(s, pImageFormatProperties->pNext) { + switch (s->sType) { + case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES: { + VkExternalImageFormatProperties *p = (void *)s; + /* From the Vulkan 1.3.256 spec: + * + * "If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 + * will behave as if VkPhysicalDeviceExternalImageFormatInfo was + * not present, and VkExternalImageFormatProperties will be + * ignored." + * + * This is true if and only if ext_mem_props == NULL + */ + if (ext_mem_props != NULL) + p->externalMemoryProperties = *ext_mem_props; + break; + } + case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES: { + VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = (void *)s; + ycbcr_props->combinedImageSamplerDescriptorCount = plane_count; + break; + } + case VK_STRUCTURE_TYPE_HOST_IMAGE_COPY_DEVICE_PERFORMANCE_QUERY_EXT: { + VkHostImageCopyDevicePerformanceQueryEXT *host_props = (void *)s; + host_props->optimalDeviceAccess = true; + host_props->identicalMemoryLayout = true; + break; + } + default: + vk_debug_ignored_stype(s->sType); + break; + } + } + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetPhysicalDeviceSparseImageFormatProperties2( + VkPhysicalDevice physicalDevice, + const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo, + uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties) +{ + *pPropertyCount = 0; + return; +} + +static VkResult +kk_image_init(struct kk_device *dev, struct kk_image *image, + const VkImageCreateInfo *pCreateInfo) +{ + vk_image_init(&dev->vk, &image->vk, pCreateInfo); + + if ((image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && + image->vk.samples > 1) { + image->vk.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + image->vk.stencil_usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + } + + if (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) + image->vk.usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + if (image->vk.usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { + if (util_format_is_depth_or_stencil( + vk_format_to_pipe_format(image->vk.format))) { + image->vk.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + image->vk.stencil_usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; + } else { + image->vk.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + } + } + + image->plane_count = vk_format_get_plane_count(pCreateInfo->format); + image->disjoint = image->plane_count > 1 && + (pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT); + + const struct vk_format_ycbcr_info *ycbcr_info = + vk_format_get_ycbcr_info(pCreateInfo->format); + for (uint8_t plane = 0; plane < image->plane_count; plane++) { + VkFormat format = + ycbcr_info ? ycbcr_info->planes[plane].format : pCreateInfo->format; + const uint8_t width_scale = + ycbcr_info ? ycbcr_info->planes[plane].denominator_scales[0] : 1; + const uint8_t height_scale = + ycbcr_info ? ycbcr_info->planes[plane].denominator_scales[1] : 1; + kk_image_layout_init(dev, pCreateInfo, vk_format_to_pipe_format(format), + width_scale, height_scale, + &image->planes[plane].layout); + } + + if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + kk_image_layout_init(dev, pCreateInfo, PIPE_FORMAT_R32_UINT, 1, 1, + &image->stencil_copy_temp.layout); + } + + return VK_SUCCESS; +} + +static void +kk_image_plane_size_align_B(struct kk_device *dev, const struct kk_image *image, + const struct kk_image_plane *plane, + uint64_t *size_B_out, uint64_t *align_B_out) +{ + *size_B_out = plane->layout.size_B; + *align_B_out = plane->layout.align_B; +} + +static void +kk_image_plane_finish(struct kk_device *dev, struct kk_image_plane *plane, + VkImageCreateFlags create_flags, + const VkAllocationCallbacks *pAllocator) +{ + if (plane->mtl_handle != NULL) + mtl_release(plane->mtl_handle); + if (plane->mtl_handle_array != NULL) + mtl_release(plane->mtl_handle_array); +} + +static void +kk_image_finish(struct kk_device *dev, struct kk_image *image, + const VkAllocationCallbacks *pAllocator) +{ + for (uint8_t plane = 0; plane < image->plane_count; plane++) { + kk_image_plane_finish(dev, &image->planes[plane], image->vk.create_flags, + pAllocator); + } + + if (image->stencil_copy_temp.layout.size_B > 0) { + kk_image_plane_finish(dev, &image->stencil_copy_temp, + image->vk.create_flags, pAllocator); + } + + vk_image_finish(&image->vk); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkImage *pImage) +{ + VK_FROM_HANDLE(kk_device, dev, _device); + struct kk_physical_device *pdev = kk_device_physical(dev); + struct kk_image *image; + VkResult result; + +#ifdef KK_USE_WSI_PLATFORM + /* Ignore swapchain creation info on Android. Since we don't have an + * implementation in Mesa, we're guaranteed to access an Android object + * incorrectly. + */ + const VkImageSwapchainCreateInfoKHR *swapchain_info = + vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR); + if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) { + return wsi_common_create_swapchain_image( + &pdev->wsi_device, pCreateInfo, swapchain_info->swapchain, pImage); + } +#endif + + image = vk_zalloc2(&dev->vk.alloc, pAllocator, sizeof(*image), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!image) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + result = kk_image_init(dev, image, pCreateInfo); + if (result != VK_SUCCESS) { + vk_free2(&dev->vk.alloc, pAllocator, image); + return result; + } + + *pImage = kk_image_to_handle(image); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +kk_DestroyImage(VkDevice device, VkImage _image, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_image, image, _image); + + if (!image) + return; + + kk_image_finish(dev, image, pAllocator); + vk_free2(&dev->vk.alloc, pAllocator, image); +} + +static void +kk_image_plane_add_req(struct kk_device *dev, const struct kk_image *image, + const struct kk_image_plane *plane, uint64_t *size_B, + uint32_t *align_B) +{ + assert(util_is_power_of_two_or_zero64(*align_B)); + uint64_t plane_size_B, plane_align_B; + kk_image_plane_size_align_B(dev, image, plane, &plane_size_B, + &plane_align_B); + + *align_B = MAX2(*align_B, plane_align_B); + *size_B = align64(*size_B, plane_align_B); + *size_B += plane_size_B; +} + +static void +kk_get_image_memory_requirements(struct kk_device *dev, struct kk_image *image, + VkImageAspectFlags aspects, + VkMemoryRequirements2 *pMemoryRequirements) +{ + struct kk_physical_device *pdev = kk_device_physical(dev); + uint32_t memory_types = (1 << pdev->mem_type_count) - 1; + + /* Remove non host visible heaps from the types for host image copy in case + * of potential issues. This should be removed when we get ReBAR. + */ + if (image->vk.usage & VK_IMAGE_USAGE_HOST_TRANSFER_BIT_EXT) { + for (uint32_t i = 0; i < pdev->mem_type_count; i++) { + if (!(pdev->mem_types[i].propertyFlags & + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) + memory_types &= ~BITFIELD_BIT(i); + } + } + + // TODO hope for the best? + + uint64_t size_B = 0; + uint32_t align_B = 0; + if (image->disjoint) { + uint8_t plane = kk_image_memory_aspects_to_plane(image, aspects); + kk_image_plane_add_req(dev, image, &image->planes[plane], &size_B, + &align_B); + } else { + for (unsigned plane = 0; plane < image->plane_count; plane++) { + kk_image_plane_add_req(dev, image, &image->planes[plane], &size_B, + &align_B); + } + } + + if (image->stencil_copy_temp.layout.size_B > 0) { + kk_image_plane_add_req(dev, image, &image->stencil_copy_temp, &size_B, + &align_B); + } + + pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types; + pMemoryRequirements->memoryRequirements.alignment = align_B; + pMemoryRequirements->memoryRequirements.size = size_B; + + vk_foreach_struct_const(ext, pMemoryRequirements->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: { + VkMemoryDedicatedRequirements *dedicated = (void *)ext; + dedicated->prefersDedicatedAllocation = + image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; + dedicated->requiresDedicatedAllocation = + image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT; + break; + } + default: + vk_debug_ignored_stype(ext->sType); + break; + } + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetImageMemoryRequirements2(VkDevice device, + const VkImageMemoryRequirementsInfo2 *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_image, image, pInfo->image); + + const VkImagePlaneMemoryRequirementsInfo *plane_info = + vk_find_struct_const(pInfo->pNext, IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO); + const VkImageAspectFlags aspects = + image->disjoint ? plane_info->planeAspect : image->vk.aspects; + + kk_get_image_memory_requirements(dev, image, aspects, pMemoryRequirements); +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetDeviceImageMemoryRequirements(VkDevice device, + const VkDeviceImageMemoryRequirements *pInfo, + VkMemoryRequirements2 *pMemoryRequirements) +{ + VK_FROM_HANDLE(kk_device, dev, device); + ASSERTED VkResult result; + struct kk_image image = {0}; + + result = kk_image_init(dev, &image, pInfo->pCreateInfo); + assert(result == VK_SUCCESS); + + const VkImageAspectFlags aspects = + image.disjoint ? pInfo->planeAspect : image.vk.aspects; + + kk_get_image_memory_requirements(dev, &image, aspects, pMemoryRequirements); + + kk_image_finish(dev, &image, NULL); +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetImageSparseMemoryRequirements2( + VkDevice device, const VkImageSparseMemoryRequirementsInfo2 *pInfo, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) +{ + *pSparseMemoryRequirementCount = 0u; +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetDeviceImageSparseMemoryRequirements( + VkDevice device, const VkDeviceImageMemoryRequirements *pInfo, + uint32_t *pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements) +{ + *pSparseMemoryRequirementCount = 0u; +} + +static void +kk_get_image_subresource_layout(struct kk_device *dev, struct kk_image *image, + const VkImageSubresource2KHR *pSubresource, + VkSubresourceLayout2KHR *pLayout) +{ + const VkImageSubresource *isr = &pSubresource->imageSubresource; + + const uint8_t p = kk_image_memory_aspects_to_plane(image, isr->aspectMask); + const struct kk_image_plane *plane = &image->planes[p]; + + uint64_t offset_B = 0; + if (!image->disjoint) { + uint32_t align_B = 0; + for (unsigned i = 0; i < p; i++) { + kk_image_plane_add_req(dev, image, &image->planes[i], &offset_B, + &align_B); + } + } + + pLayout->subresourceLayout = (VkSubresourceLayout){ + .offset = offset_B, + .size = plane->layout.size_B, + .rowPitch = plane->layout.linear_stride_B, + .arrayPitch = plane->layout.layer_stride_B, + .depthPitch = 1u, + }; +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetImageSubresourceLayout2KHR(VkDevice device, VkImage _image, + const VkImageSubresource2KHR *pSubresource, + VkSubresourceLayout2KHR *pLayout) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_image, image, _image); + + kk_get_image_subresource_layout(dev, image, pSubresource, pLayout); +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetDeviceImageSubresourceLayoutKHR( + VkDevice device, const VkDeviceImageSubresourceInfoKHR *pInfo, + VkSubresourceLayout2KHR *pLayout) +{ + VK_FROM_HANDLE(kk_device, dev, device); + ASSERTED VkResult result; + struct kk_image image = {0}; + + result = kk_image_init(dev, &image, pInfo->pCreateInfo); + assert(result == VK_SUCCESS); + + kk_get_image_subresource_layout(dev, &image, pInfo->pSubresource, pLayout); + + kk_image_finish(dev, &image, NULL); +} + +static VkResult +kk_image_plane_bind(struct kk_device *dev, struct kk_image *image, + struct kk_image_plane *plane, struct kk_device_memory *mem, + uint64_t *offset_B) +{ + uint64_t plane_size_B, plane_align_B; + kk_image_plane_size_align_B(dev, image, plane, &plane_size_B, + &plane_align_B); + *offset_B = align64(*offset_B, plane_align_B); + + /* Linear textures in Metal need to be allocated through a buffer... */ + if (plane->layout.optimized_layout) + plane->mtl_handle = mtl_new_texture_with_descriptor( + mem->bo->mtl_handle, &plane->layout, *offset_B); + else + plane->mtl_handle = mtl_new_texture_with_descriptor_linear( + mem->bo->map, &plane->layout, *offset_B); + plane->addr = mem->bo->gpu + *offset_B; + + /* Create auxiliary 2D array texture for 3D images so we can use 2D views of + * it */ + if (plane->layout.type == MTL_TEXTURE_TYPE_3D && + (image->vk.create_flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT)) { + struct kk_image_layout array_layout = plane->layout; + array_layout.type = MTL_TEXTURE_TYPE_2D_ARRAY; + // TODO_KOSMICKRISP We need to make sure that this doesn't go over Metal's + // layer maximum which is 2048. Probably by limiting the dimensions and + // layers for 3D images + array_layout.layers = array_layout.layers * array_layout.depth_px; + array_layout.depth_px = 1u; + plane->mtl_handle_array = mtl_new_texture_with_descriptor( + mem->bo->mtl_handle, &array_layout, *offset_B); + } + + *offset_B += plane_size_B; + + return VK_SUCCESS; +} + +static VkResult +kk_bind_image_memory(struct kk_device *dev, const VkBindImageMemoryInfo *info) +{ + VK_FROM_HANDLE(kk_device_memory, mem, info->memory); + VK_FROM_HANDLE(kk_image, image, info->image); + VkResult result; + + /* Ignore this struct on Android, we cannot access swapchain structures + * there. */ +#ifdef KK_USE_WSI_PLATFORM + const VkBindImageMemorySwapchainInfoKHR *swapchain_info = + vk_find_struct_const(info->pNext, BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR); + + if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) { + VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_info->swapchain); + VkImage _wsi_image = + swapchain->get_wsi_image(swapchain, swapchain_info->imageIndex)->image; + VK_FROM_HANDLE(kk_image, wsi_img, _wsi_image); + + assert(image->plane_count == 1); + assert(wsi_img->plane_count == 1); + + struct kk_image_plane *plane = &image->planes[0]; + struct kk_image_plane *swapchain_plane = &wsi_img->planes[0]; + + /* Copy swapchain plane data retaining relevant resources. */ + plane->layout = swapchain_plane->layout; + plane->mtl_handle = mtl_retain(swapchain_plane->mtl_handle); + plane->mtl_handle_array = + swapchain_plane->mtl_handle_array + ? mtl_retain(swapchain_plane->mtl_handle_array) + : NULL; + plane->addr = swapchain_plane->addr; + + return VK_SUCCESS; + } +#endif + + uint64_t offset_B = info->memoryOffset; + if (image->disjoint) { + const VkBindImagePlaneMemoryInfo *plane_info = + vk_find_struct_const(info->pNext, BIND_IMAGE_PLANE_MEMORY_INFO); + const uint8_t plane = + kk_image_memory_aspects_to_plane(image, plane_info->planeAspect); + result = + kk_image_plane_bind(dev, image, &image->planes[plane], mem, &offset_B); + if (result != VK_SUCCESS) + return result; + } else { + for (unsigned plane = 0; plane < image->plane_count; plane++) { + result = kk_image_plane_bind(dev, image, &image->planes[plane], mem, + &offset_B); + if (result != VK_SUCCESS) + return result; + } + } + + if (image->stencil_copy_temp.layout.size_B > 0) { + result = kk_image_plane_bind(dev, image, &image->stencil_copy_temp, mem, + &offset_B); + if (result != VK_SUCCESS) + return result; + } + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_BindImageMemory2(VkDevice device, uint32_t bindInfoCount, + const VkBindImageMemoryInfo *pBindInfos) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VkResult first_error_or_success = VK_SUCCESS; + + for (uint32_t i = 0; i < bindInfoCount; ++i) { + VkResult result = kk_bind_image_memory(dev, &pBindInfos[i]); + + const VkBindMemoryStatusKHR *status = + vk_find_struct_const(pBindInfos[i].pNext, BIND_MEMORY_STATUS_KHR); + if (status != NULL && status->pResult != NULL) + *status->pResult = VK_SUCCESS; + + if (first_error_or_success == VK_SUCCESS) + first_error_or_success = result; + } + + return first_error_or_success; +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_GetImageOpaqueCaptureDescriptorDataEXT( + VkDevice _device, const VkImageCaptureDescriptorDataInfoEXT *pInfo, + void *pData) +{ + return VK_SUCCESS; +} diff --git a/src/kosmickrisp/vulkan/kk_image.h b/src/kosmickrisp/vulkan/kk_image.h new file mode 100644 index 00000000000..7ef11db3133 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_image.h @@ -0,0 +1,155 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_IMAGE_H +#define KK_IMAGE_H 1 + +#include "kk_private.h" + +#include "kk_device_memory.h" +#include "kk_image_layout.h" + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "vk_image.h" + +/* Because small images can end up with an array_stride_B that is less than + * the sparse block size (in bytes), we have to set SINGLE_MIPTAIL_BIT when + * advertising sparse properties to the client. This means that we get one + * single memory range for the miptail of the image. For large images with + * mipTailStartLod > 0, we have to deal with the array stride ourselves. + * + * We do this by returning NVK_MIP_TAIL_START_OFFSET as the image's + * imageMipTailOffset. We can then detect anything with that address as + * being part of the miptail and re-map it accordingly. The Vulkan spec + * explicitly allows for this. + * + * From the Vulkan 1.3.279 spec: + * + * "When VK_SPARSE_MEMORY_BIND_METADATA_BIT is present, the resourceOffset + * must have been derived explicitly from the imageMipTailOffset in the + * sparse resource properties returned for the metadata aspect. By + * manipulating the value returned for imageMipTailOffset, the + * resourceOffset does not have to correlate directly to a device virtual + * address offset, and may instead be whatever value makes it easiest for + * the implementation to derive the correct device virtual address." + */ +#define NVK_MIP_TAIL_START_OFFSET 0x6d74000000000000UL + +struct kk_device_memory; +struct kk_physical_device; +struct kk_queue; + +VkFormatFeatureFlags2 +kk_get_image_format_features(struct kk_physical_device *pdevice, + VkFormat format, VkImageTiling tiling, + uint64_t drm_format_mod); + +uint32_t kk_image_max_dimension(VkImageType image_type); + +struct kk_image_plane { + struct kk_image_layout layout; + // TODO_KOSMICKRISP Only have one handle since we will only create 2D arrays + // anyway + /* Metal handle with original handle type */ + mtl_texture *mtl_handle; + /* Metal handle with 2D array type for 3D images */ + mtl_texture *mtl_handle_array; + uint64_t addr; +}; + +struct kk_image { + struct vk_image vk; + + /** True if the planes are bound separately + * * This is set based on VK_IMAGE_CREATE_DISJOINT_BIT + */ + bool disjoint; + + uint8_t plane_count; + struct kk_image_plane planes[3]; + + /* In order to support D32_SFLOAT_S8_UINT, a temp area is + * needed. The stencil plane can't be a copied using the DMA + * engine in a single pass since it would need 8 components support. + * Instead we allocate a 16-bit temp, that gets copied into, then + * copied again down to the 8-bit result. + */ + struct kk_image_plane stencil_copy_temp; +}; + +static inline mtl_resource * +kk_image_to_mtl_resource(const struct kk_image *image, int plane) +{ + if (image != NULL) { + assert(plane < ARRAY_SIZE(image->planes)); + return (mtl_resource *)image->planes[plane].mtl_handle; + } + return NULL; +} + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE) + +static inline uint64_t +kk_image_plane_base_address(const struct kk_image_plane *plane) +{ + return plane->addr; +} + +static inline uint64_t +kk_image_base_address(const struct kk_image *image, uint8_t plane) +{ + return kk_image_plane_base_address(&image->planes[plane]); +} + +static inline uint8_t +kk_image_aspects_to_plane(ASSERTED const struct kk_image *image, + VkImageAspectFlags aspectMask) +{ + /* Memory planes are only allowed for memory operations */ + assert(!(aspectMask & (VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT | + VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT | + VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT | + VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT))); + + /* Verify that the aspects are actually in the image */ + assert(!(aspectMask & ~image->vk.aspects)); + + /* Must only be one aspect unless it's depth/stencil */ + assert(aspectMask == + (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) || + util_bitcount(aspectMask) == 1); + + switch (aspectMask) { + case VK_IMAGE_ASPECT_PLANE_1_BIT: + return 1; + case VK_IMAGE_ASPECT_PLANE_2_BIT: + return 2; + default: + return 0; + } +} + +static inline uint8_t +kk_image_memory_aspects_to_plane(ASSERTED const struct kk_image *image, + VkImageAspectFlags aspectMask) +{ + if (aspectMask & (VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT | + VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT | + VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT | + VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT)) { + /* We don't support DRM format modifiers on anything but single-plane + * color at the moment. + */ + assert(aspectMask == VK_IMAGE_ASPECT_MEMORY_PLANE_0_BIT_EXT); + return 0; + } else { + return kk_image_aspects_to_plane(image, aspectMask); + } +} + +#endif diff --git a/src/kosmickrisp/vulkan/kk_image_layout.c b/src/kosmickrisp/vulkan/kk_image_layout.c new file mode 100644 index 00000000000..06d4ed8754a --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_image_layout.c @@ -0,0 +1,124 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_image_layout.h" + +#include "kk_device.h" +#include "kk_format.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" +#include "kosmickrisp/bridge/mtl_format.h" + +#include "util/format/u_format.h" + +static enum mtl_texture_type +vk_image_create_info_to_mtl_texture_type( + const struct VkImageCreateInfo *create_info) +{ + uint32_t array_layers = create_info->arrayLayers; + uint32_t samples = create_info->samples; + switch (create_info->imageType) { + case VK_IMAGE_TYPE_1D: + case VK_IMAGE_TYPE_2D: + /* We require input attachments to be arrays */ + if (array_layers > 1 || + (create_info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + return samples > 1u ? MTL_TEXTURE_TYPE_2D_ARRAY_MULTISAMPLE + : MTL_TEXTURE_TYPE_2D_ARRAY; + return samples > 1u ? MTL_TEXTURE_TYPE_2D_MULTISAMPLE + : MTL_TEXTURE_TYPE_2D; + case VK_IMAGE_TYPE_3D: + return MTL_TEXTURE_TYPE_3D; + default: + UNREACHABLE("Invalid image type"); + return MTL_TEXTURE_TYPE_1D; /* Just return a type we don't actually use */ + } +} + +static enum mtl_texture_usage +vk_image_usage_flags_to_mtl_texture_usage(VkImageUsageFlags usage_flags, + VkImageCreateFlags create_flags, + bool supports_atomics) +{ + enum mtl_texture_usage usage = 0u; + + const VkImageUsageFlags shader_write = + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT; + if (usage_flags & shader_write) + usage |= MTL_TEXTURE_USAGE_SHADER_WRITE; + + const VkImageUsageFlags shader_read = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT; + if (usage_flags & shader_read) + usage |= MTL_TEXTURE_USAGE_SHADER_READ; + + const VkImageUsageFlags render_attachment = + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + if (usage_flags & render_attachment) + usage |= MTL_TEXTURE_USAGE_RENDER_TARGET; + + if (create_flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) + usage |= MTL_TEXTURE_USAGE_PIXEL_FORMAT_VIEW; + + if (supports_atomics) { + usage |= MTL_TEXTURE_USAGE_SHADER_READ; + usage |= MTL_TEXTURE_USAGE_SHADER_WRITE; + usage |= MTL_TEXTURE_USAGE_SHADER_ATOMIC; + } + + return usage; +} + +void +kk_image_layout_init(const struct kk_device *dev, + const struct VkImageCreateInfo *create_info, + enum pipe_format format, const uint8_t width_scale, + const uint8_t height_scale, struct kk_image_layout *layout) +{ + const struct kk_va_format *supported_format = kk_get_va_format(format); + layout->type = vk_image_create_info_to_mtl_texture_type(create_info); + layout->width_px = create_info->extent.width / width_scale; + layout->height_px = create_info->extent.height / height_scale; + layout->depth_px = create_info->extent.depth; + layout->layers = create_info->arrayLayers; + layout->levels = create_info->mipLevels; + layout->optimized_layout = create_info->tiling == VK_IMAGE_TILING_OPTIMAL; + layout->usage = vk_image_usage_flags_to_mtl_texture_usage( + create_info->usage, create_info->flags, supported_format->atomic); + layout->format.pipe = format; + layout->format.mtl = supported_format->mtl_pixel_format; + layout->swizzle.red = supported_format->swizzle.red; + layout->swizzle.green = supported_format->swizzle.green; + layout->swizzle.blue = supported_format->swizzle.blue; + layout->swizzle.alpha = supported_format->swizzle.alpha; + layout->sample_count_sa = create_info->samples; + mtl_heap_texture_size_and_align_with_descriptor(dev->mtl_handle, layout); + + /* + * Metal requires adding MTL_TEXTURE_USAGE_PIXEL_FORMAT_VIEW if we are going + * to reinterpret the format with a different format. This seems to be the + * only format with this issue. + */ + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + layout->usage |= MTL_TEXTURE_USAGE_PIXEL_FORMAT_VIEW; + } + + // TODO_KOSMICKRISP Fill remaining offsets and strides whenever possible + if (create_info->tiling == VK_IMAGE_TILING_LINEAR) { + const struct util_format_description *format_desc = + util_format_description(layout->format.pipe); + size_t bytes_per_texel = format_desc->block.bits / 8; + layout->linear_stride_B = + align(bytes_per_texel * layout->width_px, layout->align_B); + layout->layer_stride_B = layout->linear_stride_B * layout->height_px; + /* Metal only allows for 2D texture with no mipmapping. */ + layout->size_B = layout->layer_stride_B; + } +} diff --git a/src/kosmickrisp/vulkan/kk_image_layout.h b/src/kosmickrisp/vulkan/kk_image_layout.h new file mode 100644 index 00000000000..70b1c2bfe68 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_image_layout.h @@ -0,0 +1,140 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_IMAGE_LAYOUT_H +#define KK_IMAGE_LAYOUT_H 1 + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "util/format/u_formats.h" + +#include "vulkan/vulkan.h" + +#define KK_MAX_MIP_LEVELS 16 + +struct kk_device; +struct VkImageCreateInfo; +enum pipe_swizzle; + +struct kk_image_layout { + /** Width, height, and depth in pixels at level 0 */ + uint32_t width_px, height_px, depth_px, layers; + + enum mtl_texture_type type; + + /** Number of samples per pixel. 1 if multisampling is disabled. */ + uint8_t sample_count_sa; + + /** Number of miplevels. 1 if no mipmapping is used. */ + uint8_t levels; + + uint8_t optimized_layout; + + enum mtl_texture_usage usage; + + /** Texture format */ + struct { + enum pipe_format pipe; + uint32_t mtl; + } format; + + /* Required to correctly set image swizzle for non-native formats */ + /* Would love to use enum pipe_swizzle, but it's bigger than the required + * type for util_format_compose_swizzles... */ + struct { + uint8_t red; + uint8_t green; + uint8_t blue; + uint8_t alpha; + } swizzle; + + /** + * If tiling is LINEAR, the number of bytes between adjacent rows of + * elements. Otherwise, this field is zero. + */ + uint32_t linear_stride_B; + + /** + * Stride between layers of an array texture, including a cube map. Layer i + * begins at offset (i * layer_stride_B) from the beginning of the texture. + * + * If depth_px = 1, the value of this field is UNDEFINED. + */ + uint64_t layer_stride_B; + + /** + * Offsets of mip levels within a layer. + */ + uint64_t level_offsets_B[KK_MAX_MIP_LEVELS]; + + /** + * If tiling is TWIDDLED, the stride in elements used for each mip level + * within a layer. Calculating level strides is the sole responsibility of + * ail_initialized_twiddled. This is necessary because compressed pixel + * formats may add extra stride padding. + */ + uint32_t stride_el[KK_MAX_MIP_LEVELS]; + + /* Size of entire texture */ + uint64_t size_B; + + /* Alignment required */ + uint64_t align_B; +}; + +struct kk_view_layout { + /** Type */ + VkImageViewType view_type; + + /** Number of samples per pixel. 1 if multisampling is disabled. + * Required to be able to correctly set the MTLTextureType. + */ + uint8_t sample_count_sa; + + /** Texture format */ + struct { + enum pipe_format pipe; + uint32_t mtl; + } format; + + /** Array base level. 0 if no array is used. */ + uint16_t base_array_layer; + + /** Array length. 1 if no array is used. */ + uint16_t array_len; + + /** Swizzle */ + /* Would love to use enum pipe_swizzle, but it's bigger than the required + * type for util_format_compose_swizzles... */ + struct { + union { + struct { + uint8_t red; + uint8_t green; + uint8_t blue; + uint8_t alpha; + }; + uint8_t channels[4]; + }; + } swizzle; + + /** Mipmap base level. 0 if no mipmapping is used. */ + uint8_t base_level; + + /** Number of miplevels. 1 if no mipmapping is used. */ + uint8_t num_levels; + + uint16_t min_lod_clamp; +}; + +void kk_image_layout_init(const struct kk_device *dev, + const struct VkImageCreateInfo *create_info, + enum pipe_format format, const uint8_t width_scale, + const uint8_t height_scale, + struct kk_image_layout *layout); + +#endif /* KK_IMAGE_LAYOUT_H */ diff --git a/src/kosmickrisp/vulkan/kk_image_view.c b/src/kosmickrisp/vulkan/kk_image_view.c new file mode 100644 index 00000000000..ed8bd485972 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_image_view.c @@ -0,0 +1,267 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_image_view.h" + +#include "kk_device.h" +#include "kk_entrypoints.h" +#include "kk_format.h" +#include "kk_image.h" +#include "kk_physical_device.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" +#include "kosmickrisp/bridge/mtl_format.h" + +#include "vk_format.h" + +static enum pipe_swizzle +vk_swizzle_to_pipe(VkComponentSwizzle swizzle) +{ + switch (swizzle) { + case VK_COMPONENT_SWIZZLE_R: + return PIPE_SWIZZLE_X; + case VK_COMPONENT_SWIZZLE_G: + return PIPE_SWIZZLE_Y; + case VK_COMPONENT_SWIZZLE_B: + return PIPE_SWIZZLE_Z; + case VK_COMPONENT_SWIZZLE_A: + return PIPE_SWIZZLE_W; + case VK_COMPONENT_SWIZZLE_ONE: + return PIPE_SWIZZLE_1; + case VK_COMPONENT_SWIZZLE_ZERO: + return PIPE_SWIZZLE_0; + default: + UNREACHABLE("Invalid component swizzle"); + } +} + +static enum VkImageViewType +remove_1d_view_types(enum VkImageViewType type) +{ + if (type == VK_IMAGE_VIEW_TYPE_1D) + return VK_IMAGE_VIEW_TYPE_2D; + if (type == VK_IMAGE_VIEW_TYPE_1D_ARRAY) + return VK_IMAGE_VIEW_TYPE_2D_ARRAY; + return type; +} + +VkResult +kk_image_view_init(struct kk_device *dev, struct kk_image_view *view, + const VkImageViewCreateInfo *pCreateInfo) +{ + VK_FROM_HANDLE(kk_image, image, pCreateInfo->image); + + memset(view, 0, sizeof(*view)); + + vk_image_view_init(&dev->vk, &view->vk, pCreateInfo); + + /* First, figure out which image planes we need. + * For depth/stencil, we only have plane so simply assert + * and then map directly betweeen the image and view plane + */ + if (image->vk.aspects & + (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { + assert(image->plane_count == 1); + assert(kk_image_aspects_to_plane(image, view->vk.aspects) == 0); + view->plane_count = 1; + view->planes[0].image_plane = 0; + } else { + /* For other formats, retrieve the plane count from the aspect mask + * and then walk through the aspect mask to map each image plane + * to its corresponding view plane + */ + assert(util_bitcount(view->vk.aspects) == + vk_format_get_plane_count(view->vk.format)); + view->plane_count = 0; + u_foreach_bit(aspect_bit, view->vk.aspects) { + uint8_t image_plane = + kk_image_aspects_to_plane(image, 1u << aspect_bit); + view->planes[view->plane_count++].image_plane = image_plane; + } + } + /* Finally, fill in each view plane separately */ + for (unsigned view_plane = 0; view_plane < view->plane_count; view_plane++) { + const uint8_t image_plane = view->planes[view_plane].image_plane; + struct kk_image_plane *plane = &image->planes[image_plane]; + + const struct vk_format_ycbcr_info *ycbcr_info = + vk_format_get_ycbcr_info(view->vk.format); + assert(ycbcr_info || view_plane == 0); + VkFormat plane_format = + ycbcr_info ? ycbcr_info->planes[view_plane].format : view->vk.format; + enum pipe_format p_format = vk_format_to_pipe_format(plane_format); + if (view->vk.aspects == VK_IMAGE_ASPECT_DEPTH_BIT) + p_format = vk_format_to_pipe_format(image->vk.format); + else if (view->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT) + p_format = util_format_stencil_only( + vk_format_to_pipe_format(image->vk.format)); + + view->planes[view_plane].format = p_format; + const struct kk_va_format *supported_format = kk_get_va_format(p_format); + assert(supported_format); + + struct kk_view_layout view_layout = { + .view_type = remove_1d_view_types(view->vk.view_type), + .sample_count_sa = plane->layout.sample_count_sa, + .format = {.pipe = p_format, + .mtl = supported_format->mtl_pixel_format}, + .base_level = view->vk.base_mip_level, + .num_levels = view->vk.level_count, + .base_array_layer = view->vk.base_array_layer, + .array_len = view->vk.layer_count, + .min_lod_clamp = view->vk.min_lod, + }; + uint8_t view_swizzle[4] = {vk_swizzle_to_pipe(view->vk.swizzle.r), + vk_swizzle_to_pipe(view->vk.swizzle.g), + vk_swizzle_to_pipe(view->vk.swizzle.b), + vk_swizzle_to_pipe(view->vk.swizzle.a)}; + util_format_compose_swizzles(supported_format->swizzle.channels, + view_swizzle, view_layout.swizzle.channels); + + /* When sampling a depth/stencil texture Metal returns (d, d, d, 1), but + * Vulkan requires (d, 0, 0, 1). This means, we need to convert G and B to + * 0 */ + if (util_format_is_depth_or_stencil(p_format)) { + if (view_layout.swizzle.red == PIPE_SWIZZLE_Y || + view_layout.swizzle.red == PIPE_SWIZZLE_Z) + view_layout.swizzle.red = PIPE_SWIZZLE_0; + if (view_layout.swizzle.green == PIPE_SWIZZLE_Y || + view_layout.swizzle.green == PIPE_SWIZZLE_Z) + view_layout.swizzle.green = PIPE_SWIZZLE_0; + if (view_layout.swizzle.blue == PIPE_SWIZZLE_Y || + view_layout.swizzle.blue == PIPE_SWIZZLE_Z) + view_layout.swizzle.blue = PIPE_SWIZZLE_0; + if (view_layout.swizzle.alpha == PIPE_SWIZZLE_Y || + view_layout.swizzle.alpha == PIPE_SWIZZLE_Z) + view_layout.swizzle.alpha = PIPE_SWIZZLE_0; + } + + mtl_texture *mtl_handle = image->planes[image_plane].mtl_handle; + if (image->vk.image_type == VK_IMAGE_TYPE_3D && + view->vk.view_type != VK_IMAGE_VIEW_TYPE_3D) + mtl_handle = image->planes[image_plane].mtl_handle_array; + + if (view->vk.usage & + (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) { + view->planes[view_plane].mtl_handle_sampled = + mtl_new_texture_view_with(mtl_handle, &view_layout); + view->planes[view_plane].sampled_gpu_resource_id = + mtl_texture_get_gpu_resource_id( + view->planes[view_plane].mtl_handle_sampled); + } + + if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) { + /* For storage images, we can't have any cubes */ + if (view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE || + view->vk.view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) + view_layout.view_type = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + + view->planes[view_plane].mtl_handle_storage = + mtl_new_texture_view_with(mtl_handle, &view_layout); + view->planes[view_plane].storage_gpu_resource_id = + mtl_texture_get_gpu_resource_id( + view->planes[view_plane].mtl_handle_storage); + } + + if (view->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | + VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + bool requires_type_change = + view_layout.view_type != VK_IMAGE_VIEW_TYPE_3D && + view_layout.view_type != VK_IMAGE_VIEW_TYPE_2D_ARRAY; + bool requires_format_change = view->vk.format != image->vk.format; + VkImageViewType original_type = view_layout.view_type; + + /* Required so sampling from input attachments actually return (d, 0, + * 0, 1) for d/s attachments and render targets cannot have swizzle + * according to Metal... + */ + if (requires_type_change || requires_format_change) { + view_layout.view_type = requires_type_change + ? VK_IMAGE_VIEW_TYPE_2D_ARRAY + : original_type; + view->planes[view_plane].mtl_handle_input = + mtl_new_texture_view_with(mtl_handle, &view_layout); + } else + view->planes[view_plane].mtl_handle_input = mtl_retain(mtl_handle); + view->planes[view_plane].input_gpu_resource_id = + mtl_texture_get_gpu_resource_id( + view->planes[view_plane].mtl_handle_input); + + /* Handle mutable formats */ + if (requires_format_change) { + view_layout.view_type = original_type; + view_layout.base_array_layer = 0u; + view_layout.base_level = 0u; + view_layout.array_len = image->vk.array_layers; + view_layout.num_levels = image->vk.mip_levels; + view->planes[view_plane].mtl_handle_render = + mtl_new_texture_view_with_no_swizzle(mtl_handle, &view_layout); + } else + view->planes[view_plane].mtl_handle_render = mtl_retain(mtl_handle); + } + } + + return VK_SUCCESS; +} + +void +kk_image_view_finish(struct kk_device *dev, struct kk_image_view *view) +{ + for (uint8_t plane = 0; plane < view->plane_count; plane++) { + if (view->planes[plane].mtl_handle_sampled) + mtl_release(view->planes[plane].mtl_handle_sampled); + + if (view->planes[plane].mtl_handle_storage) + mtl_release(view->planes[plane].mtl_handle_storage); + + if (view->planes[plane].mtl_handle_input) + mtl_release(view->planes[plane].mtl_handle_input); + + if (view->planes[plane].mtl_handle_render) + mtl_release(view->planes[plane].mtl_handle_render); + } + + vk_image_view_finish(&view->vk); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkImageView *pView) +{ + VK_FROM_HANDLE(kk_device, dev, _device); + struct kk_image_view *view; + VkResult result; + + view = vk_alloc2(&dev->vk.alloc, pAllocator, sizeof(*view), 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!view) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + result = kk_image_view_init(dev, view, pCreateInfo); + if (result != VK_SUCCESS) { + vk_free2(&dev->vk.alloc, pAllocator, view); + return result; + } + + *pView = kk_image_view_to_handle(view); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +kk_DestroyImageView(VkDevice _device, VkImageView imageView, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_device, dev, _device); + VK_FROM_HANDLE(kk_image_view, view, imageView); + + if (!view) + return; + + kk_image_view_finish(dev, view); + vk_free2(&dev->vk.alloc, pAllocator, view); +} diff --git a/src/kosmickrisp/vulkan/kk_image_view.h b/src/kosmickrisp/vulkan/kk_image_view.h new file mode 100644 index 00000000000..1a18c5d6f66 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_image_view.h @@ -0,0 +1,57 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_IMAGE_VIEW_H +#define KK_IMAGE_VIEW_H 1 + +#include "kk_private.h" + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "util/format/u_formats.h" + +#include "vk_image.h" + +struct kk_device; + +struct kk_image_view { + struct vk_image_view vk; + + uint8_t plane_count; + struct { + uint8_t image_plane; + + enum pipe_format format; + + mtl_texture *mtl_handle_sampled; + mtl_texture + *mtl_handle_storage; // TODO_KOSMICKRISP We can probably get rid of + // this once we lower 2D cubes and 3D to 2D array? + + /* Cached handle so we don't have to retrieve it from the image when we + * render */ + mtl_texture *mtl_handle_render; + + /* Input attachment handle. Required since input attachments needs to be + * arrays, and sampled may not be */ + mtl_texture *mtl_handle_input; + + uint64_t sampled_gpu_resource_id; + uint64_t storage_gpu_resource_id; + uint64_t input_gpu_resource_id; + } planes[3]; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_image_view, vk.base, VkImageView, + VK_OBJECT_TYPE_IMAGE_VIEW) + +VkResult kk_image_view_init(struct kk_device *dev, struct kk_image_view *view, + const VkImageViewCreateInfo *pCreateInfo); + +void kk_image_view_finish(struct kk_device *dev, struct kk_image_view *view); + +#endif /* KK_IMAGE_VIEW_H */ diff --git a/src/kosmickrisp/vulkan/kk_instance.c b/src/kosmickrisp/vulkan/kk_instance.c new file mode 100644 index 00000000000..a51f2fd958b --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_instance.c @@ -0,0 +1,225 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_instance.h" + +#include "kk_debug.h" +#include "kk_entrypoints.h" +#include "kk_physical_device.h" + +#include "kk_dispatch_trampolines.h" + +#include "vulkan/wsi/wsi_common.h" + +#include "util/build_id.h" +#include "util/driconf.h" +#include "util/mesa-sha1.h" +#include "util/u_debug.h" + +VKAPI_ATTR VkResult VKAPI_CALL +kk_EnumerateInstanceVersion(uint32_t *pApiVersion) +{ + uint32_t version_override = vk_get_version_override(); + *pApiVersion = version_override ? version_override + : VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION); + + return VK_SUCCESS; +} + +static const struct vk_instance_extension_table instance_extensions = { +#ifdef KK_USE_WSI_PLATFORM + .KHR_get_surface_capabilities2 = true, + .KHR_surface = true, + .KHR_surface_protected_capabilities = true, + .EXT_surface_maintenance1 = true, + .EXT_swapchain_colorspace = true, +#endif +#ifdef VK_USE_PLATFORM_WAYLAND_KHR + .KHR_wayland_surface = true, +#endif +#ifdef VK_USE_PLATFORM_XCB_KHR + .KHR_xcb_surface = true, +#endif +#ifdef VK_USE_PLATFORM_XLIB_KHR + .KHR_xlib_surface = true, +#endif +#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT + .EXT_acquire_xlib_display = true, +#endif +#ifdef VK_USE_PLATFORM_DISPLAY_KHR + .KHR_display = true, + .KHR_get_display_properties2 = true, + .EXT_direct_mode_display = true, + .EXT_display_surface_counter = true, + .EXT_acquire_drm_display = true, +#endif +#ifdef VK_USE_PLATFORM_METAL_EXT + .EXT_metal_surface = true, +#endif +#ifndef VK_USE_PLATFORM_METAL_EXT + .EXT_headless_surface = true, +#endif + .KHR_device_group_creation = true, + .KHR_external_fence_capabilities = true, + .KHR_external_memory_capabilities = true, + .KHR_external_semaphore_capabilities = true, + .KHR_get_physical_device_properties2 = true, + .EXT_debug_report = true, + .EXT_debug_utils = true, +}; + +VKAPI_ATTR VkResult VKAPI_CALL +kk_EnumerateInstanceExtensionProperties(const char *pLayerName, + uint32_t *pPropertyCount, + VkExtensionProperties *pProperties) +{ + if (pLayerName) + return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); + + return vk_enumerate_instance_extension_properties( + &instance_extensions, pPropertyCount, pProperties); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateInstance(const VkInstanceCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkInstance *pInstance) +{ + struct kk_instance *instance; + VkResult result; + + if (pAllocator == NULL) + pAllocator = vk_default_allocator(); + + instance = vk_alloc(pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!instance) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct vk_instance_dispatch_table dispatch_table; + vk_instance_dispatch_table_from_entrypoints(&dispatch_table, + &kk_instance_entrypoints, true); + vk_instance_dispatch_table_from_entrypoints( + &dispatch_table, &wsi_instance_entrypoints, false); + + result = vk_instance_init(&instance->vk, &instance_extensions, + &dispatch_table, pCreateInfo, pAllocator); + if (result != VK_SUCCESS) + goto fail_alloc; + + instance->vk.physical_devices.enumerate = kk_enumerate_physical_devices; + instance->vk.physical_devices.destroy = kk_physical_device_destroy; + + /* TODO_KOSMICKRISP We need to fill instance->driver_build_sha */ + + kk_process_debug_variable(); + + *pInstance = kk_instance_to_handle(instance); + return VK_SUCCESS; + +// fail_init: +// vk_instance_finish(&instance->vk); +fail_alloc: + vk_free(pAllocator, instance); + + return result; +} + +VKAPI_ATTR void VKAPI_CALL +kk_DestroyInstance(VkInstance _instance, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_instance, instance, _instance); + + if (!instance) + return; + + vk_instance_finish(&instance->vk); + vk_free(&instance->vk.alloc, instance); +} + +/* We need this to return our own trampoline functions */ +static PFN_vkVoidFunction +kk_instance_get_proc_addr(const struct vk_instance *instance, + const struct vk_instance_entrypoint_table *entrypoints, + const char *name) +{ + PFN_vkVoidFunction func; + + /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly + * when we have to return valid function pointers, NULL, or it's left + * undefined. See the table for exact details. + */ + if (name == NULL) + return NULL; + +#define LOOKUP_VK_ENTRYPOINT(entrypoint) \ + if (strcmp(name, "vk" #entrypoint) == 0) \ + return (PFN_vkVoidFunction)entrypoints->entrypoint + + LOOKUP_VK_ENTRYPOINT(EnumerateInstanceExtensionProperties); + LOOKUP_VK_ENTRYPOINT(EnumerateInstanceLayerProperties); + LOOKUP_VK_ENTRYPOINT(EnumerateInstanceVersion); + LOOKUP_VK_ENTRYPOINT(CreateInstance); + + /* GetInstanceProcAddr() can also be called with a NULL instance. + * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057 + */ + LOOKUP_VK_ENTRYPOINT(GetInstanceProcAddr); + +#undef LOOKUP_VK_ENTRYPOINT + + /* Beginning with ICD interface v7, the following functions can also be + * retrieved via vk_icdGetInstanceProcAddr. + */ + + if (strcmp(name, "vk_icdNegotiateLoaderICDInterfaceVersion") == 0) + return (PFN_vkVoidFunction)vk_icdNegotiateLoaderICDInterfaceVersion; + if (strcmp(name, "vk_icdGetPhysicalDeviceProcAddr") == 0) + return (PFN_vkVoidFunction)vk_icdGetPhysicalDeviceProcAddr; +#ifdef _WIN32 + if (strcmp(name, "vk_icdEnumerateAdapterPhysicalDevices") == 0) + return (PFN_vkVoidFunction)vk_icdEnumerateAdapterPhysicalDevices; +#endif + + if (instance == NULL) + return NULL; + + func = vk_instance_dispatch_table_get_if_supported( + &instance->dispatch_table, name, instance->app_info.api_version, + &instance->enabled_extensions); + if (func != NULL) + return func; + + func = vk_physical_device_dispatch_table_get_if_supported( + &kk_physical_device_trampolines, name, instance->app_info.api_version, + &instance->enabled_extensions); + if (func != NULL) + return func; + + func = vk_device_dispatch_table_get_if_supported( + &kk_device_trampolines, name, instance->app_info.api_version, + &instance->enabled_extensions, NULL); + if (func != NULL) + return func; + + return NULL; +} + +VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +kk_GetInstanceProcAddr(VkInstance _instance, const char *pName) +{ + VK_FROM_HANDLE(kk_instance, instance, _instance); + return kk_instance_get_proc_addr(&instance->vk, &kk_instance_entrypoints, + pName); +} + +PUBLIC VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName) +{ + return kk_GetInstanceProcAddr(instance, pName); +} diff --git a/src/kosmickrisp/vulkan/kk_instance.h b/src/kosmickrisp/vulkan/kk_instance.h new file mode 100644 index 00000000000..0afbb29a55e --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_instance.h @@ -0,0 +1,26 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_INSTANCE_H +#define KK_INSTANCE_H 1 + +#include "kk_private.h" + +#include "util/xmlconfig.h" +#include "vk_instance.h" + +struct kk_instance { + struct vk_instance vk; + + uint8_t driver_build_sha[20]; + uint32_t force_vk_vendor; +}; + +VK_DEFINE_HANDLE_CASTS(kk_instance, vk.base, VkInstance, + VK_OBJECT_TYPE_INSTANCE) + +#endif // KK_INSTANCE_H diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_descriptors.c b/src/kosmickrisp/vulkan/kk_nir_lower_descriptors.c new file mode 100644 index 00000000000..da076d41815 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_nir_lower_descriptors.c @@ -0,0 +1,765 @@ +/* + * Copyright 2024 Valve Corporation + * Copyright 2024 Alyssa Rosenzweig + * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ +#include "kk_cmd_buffer.h" +#include "kk_descriptor_set.h" +#include "kk_descriptor_set_layout.h" +#include "kk_shader.h" + +#include "kosmickrisp/compiler/nir_to_msl.h" + +#include "nir.h" +#include "nir_builder.h" +#include "nir_builder_opcodes.h" +#include "nir_intrinsics.h" +#include "nir_intrinsics_indices.h" +#include "shader_enums.h" +#include "vk_pipeline.h" + +#include "vulkan/vulkan_core.h" + +struct lower_descriptors_ctx { + const struct kk_descriptor_set_layout *set_layouts[KK_MAX_SETS]; + + bool clamp_desc_array_bounds; + nir_address_format ubo_addr_format; + nir_address_format ssbo_addr_format; +}; + +static const struct kk_descriptor_set_binding_layout * +get_binding_layout(uint32_t set, uint32_t binding, + const struct lower_descriptors_ctx *ctx) +{ + assert(set < KK_MAX_SETS); + assert(ctx->set_layouts[set] != NULL); + + const struct kk_descriptor_set_layout *set_layout = ctx->set_layouts[set]; + + assert(binding < set_layout->binding_count); + return &set_layout->binding[binding]; +} + +static nir_def * +load_speculatable(nir_builder *b, unsigned num_components, unsigned bit_size, + nir_def *addr, unsigned align) +{ + return nir_build_load_global_constant(b, num_components, bit_size, addr, + .align_mul = align, + .access = ACCESS_CAN_SPECULATE); +} + +static nir_def * +load_root(nir_builder *b, unsigned num_components, unsigned bit_size, + nir_def *offset, unsigned align) +{ + nir_def *root = nir_load_buffer_ptr_kk(b, 1, 64, .binding = 0); + + /* We've bound the address of the root descriptor, index in. */ + nir_def *addr = nir_iadd(b, root, nir_u2u64(b, offset)); + + return load_speculatable(b, num_components, bit_size, addr, align); +} + +static bool +lower_load_constant(nir_builder *b, nir_intrinsic_instr *load, + const struct lower_descriptors_ctx *ctx) +{ + assert(load->intrinsic == nir_intrinsic_load_constant); + UNREACHABLE("todo: stick an address in the root descriptor or something"); + + uint32_t base = nir_intrinsic_base(load); + uint32_t range = nir_intrinsic_range(load); + + b->cursor = nir_before_instr(&load->instr); + + nir_def *offset = nir_iadd_imm(b, load->src[0].ssa, base); + nir_def *data = nir_load_ubo( + b, load->def.num_components, load->def.bit_size, nir_imm_int(b, 0), + offset, .align_mul = nir_intrinsic_align_mul(load), + .align_offset = nir_intrinsic_align_offset(load), .range_base = base, + .range = range); + + nir_def_rewrite_uses(&load->def, data); + + return true; +} + +/* helper macro for computing root descriptor byte offsets */ +#define kk_root_descriptor_offset(member) \ + offsetof(struct kk_root_descriptor_table, member) + +static nir_def * +load_descriptor_set_addr(nir_builder *b, uint32_t set, + UNUSED const struct lower_descriptors_ctx *ctx) +{ + uint32_t set_addr_offset = + kk_root_descriptor_offset(sets) + set * sizeof(uint64_t); + + return load_root(b, 1, 64, nir_imm_int(b, set_addr_offset), 8); +} + +static nir_def * +load_dynamic_buffer_start(nir_builder *b, uint32_t set, + const struct lower_descriptors_ctx *ctx) +{ + int dynamic_buffer_start_imm = 0; + for (uint32_t s = 0; s < set; s++) { + if (ctx->set_layouts[s] == NULL) { + dynamic_buffer_start_imm = -1; + break; + } + + dynamic_buffer_start_imm += ctx->set_layouts[s]->dynamic_buffer_count; + } + + if (dynamic_buffer_start_imm >= 0) { + return nir_imm_int(b, dynamic_buffer_start_imm); + } else { + uint32_t root_offset = + kk_root_descriptor_offset(set_dynamic_buffer_start) + set; + + return nir_u2u32(b, load_root(b, 1, 8, nir_imm_int(b, root_offset), 1)); + } +} + +static nir_def * +load_descriptor(nir_builder *b, unsigned num_components, unsigned bit_size, + uint32_t set, uint32_t binding, nir_def *index, + unsigned offset_B, const struct lower_descriptors_ctx *ctx) +{ + const struct kk_descriptor_set_binding_layout *binding_layout = + get_binding_layout(set, binding, ctx); + + if (ctx->clamp_desc_array_bounds) + index = + nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1)); + + switch (binding_layout->type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { + /* Get the index in the root descriptor table dynamic_buffers array. */ + nir_def *dynamic_buffer_start = load_dynamic_buffer_start(b, set, ctx); + + index = nir_iadd(b, index, + nir_iadd_imm(b, dynamic_buffer_start, + binding_layout->dynamic_buffer_index)); + + nir_def *root_desc_offset = nir_iadd_imm( + b, nir_imul_imm(b, index, sizeof(struct kk_buffer_address)), + kk_root_descriptor_offset(dynamic_buffers)); + + assert(num_components == 4 && bit_size == 32); + nir_def *desc = load_root(b, 4, 32, root_desc_offset, 16); + + /* We know a priori that the the .w compnent (offset) is zero */ + return nir_vector_insert_imm(b, desc, nir_imm_int(b, 0), 3); + } + + case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: { + nir_def *base_addr = nir_iadd_imm( + b, load_descriptor_set_addr(b, set, ctx), binding_layout->offset); + + assert(binding_layout->stride == 1); + const uint32_t binding_size = binding_layout->array_size; + + /* Convert it to nir_address_format_64bit_bounded_global */ + assert(num_components == 4 && bit_size == 32); + return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr), + nir_unpack_64_2x32_split_y(b, base_addr), + nir_imm_int(b, binding_size), nir_imm_int(b, 0)); + } + + default: { + assert(binding_layout->stride > 0); + nir_def *desc_ubo_offset = + nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride), + binding_layout->offset + offset_B); + + unsigned desc_align_mul = (1 << (ffs(binding_layout->stride) - 1)); + desc_align_mul = MIN2(desc_align_mul, 16); + unsigned desc_align_offset = binding_layout->offset + offset_B; + desc_align_offset %= desc_align_mul; + + nir_def *desc; + nir_def *set_addr = load_descriptor_set_addr(b, set, ctx); + desc = nir_load_global_constant_offset( + b, num_components, bit_size, set_addr, desc_ubo_offset, + .align_mul = desc_align_mul, .align_offset = desc_align_offset, + .access = ACCESS_CAN_SPECULATE); + + if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || + binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) { + /* We know a priori that the the .w compnent (offset) is zero */ + assert(num_components == 4 && bit_size == 32); + desc = nir_vector_insert_imm(b, desc, nir_imm_int(b, 0), 3); + } + return desc; + } + } +} + +static bool +is_idx_intrin(nir_intrinsic_instr *intrin) +{ + while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) { + intrin = nir_src_as_intrinsic(intrin->src[0]); + if (intrin == NULL) + return false; + } + + return intrin->intrinsic == nir_intrinsic_vulkan_resource_index; +} + +static nir_def * +load_descriptor_for_idx_intrin(nir_builder *b, nir_intrinsic_instr *intrin, + const struct lower_descriptors_ctx *ctx) +{ + nir_def *index = nir_imm_int(b, 0); + + while (intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex) { + index = nir_iadd(b, index, intrin->src[1].ssa); + intrin = nir_src_as_intrinsic(intrin->src[0]); + } + + assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index); + uint32_t set = nir_intrinsic_desc_set(intrin); + uint32_t binding = nir_intrinsic_binding(intrin); + index = nir_iadd(b, index, intrin->src[0].ssa); + + return load_descriptor(b, 4, 32, set, binding, index, 0, ctx); +} + +static bool +try_lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin, + const struct lower_descriptors_ctx *ctx) +{ + ASSERTED const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + b->cursor = nir_before_instr(&intrin->instr); + + nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(intrin->src[0]); + if (idx_intrin == NULL || !is_idx_intrin(idx_intrin)) { + assert(desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER || + desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC); + return false; + } + + nir_def *desc = load_descriptor_for_idx_intrin(b, idx_intrin, ctx); + + nir_def_rewrite_uses(&intrin->def, desc); + + return true; +} + +static bool +_lower_sysval_to_root_table(nir_builder *b, nir_intrinsic_instr *intrin, + uint32_t root_table_offset) +{ + b->cursor = nir_instr_remove(&intrin->instr); + assert((root_table_offset & 3) == 0 && "aligned"); + + nir_def *val = load_root(b, intrin->def.num_components, intrin->def.bit_size, + nir_imm_int(b, root_table_offset), 4); + + nir_def_rewrite_uses(&intrin->def, val); + + return true; +} + +#define lower_sysval_to_root_table(b, intrin, member) \ + _lower_sysval_to_root_table(b, intrin, kk_root_descriptor_offset(member)) + +static bool +lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *load, + const struct lower_descriptors_ctx *ctx) +{ + const uint32_t push_region_offset = kk_root_descriptor_offset(push); + const uint32_t base = nir_intrinsic_base(load); + + b->cursor = nir_before_instr(&load->instr); + + nir_def *offset = + nir_iadd_imm(b, load->src[0].ssa, push_region_offset + base); + + nir_def *val = load_root(b, load->def.num_components, load->def.bit_size, + offset, load->def.bit_size / 8); + + nir_def_rewrite_uses(&load->def, val); + + return true; +} + +static void +get_resource_deref_binding(nir_builder *b, nir_deref_instr *deref, + uint32_t *set, uint32_t *binding, nir_def **index) +{ + if (deref->deref_type == nir_deref_type_array) { + *index = deref->arr.index.ssa; + deref = nir_deref_instr_parent(deref); + } else { + *index = nir_imm_int(b, 0); + } + + assert(deref->deref_type == nir_deref_type_var); + nir_variable *var = deref->var; + + *set = var->data.descriptor_set; + *binding = var->data.binding; +} + +static nir_def * +load_resource_addr(nir_builder *b, unsigned num_components, unsigned bit_size, + nir_deref_instr *deref, unsigned offset_B, + const struct lower_descriptors_ctx *ctx) +{ + uint32_t set, binding; + nir_def *index; + get_resource_deref_binding(b, deref, &set, &binding, &index); + + const struct kk_descriptor_set_binding_layout *binding_layout = + get_binding_layout(set, binding, ctx); + + if (ctx->clamp_desc_array_bounds) + index = + nir_umin(b, index, nir_imm_int(b, binding_layout->array_size - 1)); + + assert(binding_layout->stride > 0); + nir_def *desc_ubo_offset = + nir_iadd_imm(b, nir_imul_imm(b, index, binding_layout->stride), + binding_layout->offset + offset_B); + + return nir_iadd(b, load_descriptor_set_addr(b, set, ctx), + nir_u2u64(b, desc_ubo_offset)); +} + +static nir_def * +load_resource_deref_desc(nir_builder *b, unsigned num_components, + unsigned bit_size, nir_deref_instr *deref, + unsigned offset_B, + const struct lower_descriptors_ctx *ctx) +{ + uint32_t set, binding; + nir_def *index; + get_resource_deref_binding(b, deref, &set, &binding, &index); + return load_descriptor(b, num_components, bit_size, set, binding, index, + offset_B, ctx); +} + +static bool +lower_image_intrin(nir_builder *b, nir_intrinsic_instr *intr, + const struct lower_descriptors_ctx *ctx) +{ + b->cursor = nir_before_instr(&intr->instr); + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + + /* Reads and queries use the texture descriptor; writes and atomics PBE. */ + unsigned offs = + offsetof(struct kk_storage_image_descriptor, image_gpu_resource_id); + + nir_def *resource_addr = load_resource_addr(b, 1, 64, deref, offs, ctx); + + nir_alu_type type; + if (nir_intrinsic_has_atomic_op(intr)) { + type = nir_atomic_op_type(nir_intrinsic_atomic_op(intr)); + type |= intr->src[3].ssa->bit_size; + } else if (nir_intrinsic_has_dest_type(intr)) { + type = nir_intrinsic_dest_type(intr); + } else if (nir_intrinsic_has_src_type(intr)) { + type = nir_intrinsic_src_type(intr); + } else { + type = nir_type_uint32; + } + + nir_variable *var = nir_deref_instr_get_variable(deref); + nir_def *handle = nir_load_texture_handle_kk( + b, 1, 64, resource_addr, .dest_type = type, + .image_dim = nir_intrinsic_image_dim(intr), + .image_array = nir_intrinsic_image_array(intr), + .flags = msl_convert_access_flag(var->data.access)); + + nir_rewrite_image_intrinsic(intr, handle, true); + + return true; +} + +static bool +try_lower_intrin(nir_builder *b, nir_intrinsic_instr *intrin, + const struct lower_descriptors_ctx *ctx) +{ + switch (intrin->intrinsic) { + case nir_intrinsic_load_constant: + return lower_load_constant(b, intrin, ctx); + + case nir_intrinsic_load_vulkan_descriptor: + return try_lower_load_vulkan_descriptor(b, intrin, ctx); + + case nir_intrinsic_load_workgroup_size: + UNREACHABLE("Should have been lowered by nir_lower_cs_intrinsics()"); + + case nir_intrinsic_load_base_workgroup_id: + return lower_sysval_to_root_table(b, intrin, cs.base_group); + + case nir_intrinsic_load_blend_const_color_rgba: + return lower_sysval_to_root_table(b, intrin, draw.blend_constant); + + case nir_intrinsic_load_push_constant: + return lower_load_push_constant(b, intrin, ctx); + + case nir_intrinsic_image_deref_load: + case nir_intrinsic_image_deref_sparse_load: + case nir_intrinsic_image_deref_store: + case nir_intrinsic_image_deref_atomic: + case nir_intrinsic_image_deref_atomic_swap: + case nir_intrinsic_image_deref_size: + case nir_intrinsic_image_deref_samples: + case nir_intrinsic_image_deref_store_block_agx: + return lower_image_intrin(b, intrin, ctx); + + default: + return false; + } +} + +static bool +lower_tex(nir_builder *b, nir_tex_instr *tex, + const struct lower_descriptors_ctx *ctx) +{ + b->cursor = nir_before_instr(&tex->instr); + + nir_def *texture = nir_steal_tex_src(tex, nir_tex_src_texture_deref); + nir_def *sampler = nir_steal_tex_src(tex, nir_tex_src_sampler_deref); + if (!texture) { + assert(!sampler); + return false; + } + + nir_def *plane_ssa = nir_steal_tex_src(tex, nir_tex_src_plane); + const uint32_t plane = + plane_ssa ? nir_src_as_uint(nir_src_for_ssa(plane_ssa)) : 0; + const uint64_t plane_offset_B = + plane * sizeof(struct kk_sampled_image_descriptor); + + /* LOD bias is passed in the descriptor set, rather than embedded into + * the sampler descriptor. There's no spot in the hardware descriptor, + * plus this saves on precious sampler heap spots. + */ + if (tex->op == nir_texop_lod_bias) { + unsigned offs = + offsetof(struct kk_sampled_image_descriptor, lod_bias_fp16); + + nir_def *bias = load_resource_deref_desc( + b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)), + plane_offset_B + offs, ctx); + + nir_def_replace(&tex->def, bias); + return true; + } + + // if (tex->op == nir_texop_image_min_lod_agx) { + // assert(tex->dest_type == nir_type_float16 || + // tex->dest_type == nir_type_uint16); + + // unsigned offs = + // tex->dest_type == nir_type_float16 + // ? offsetof(struct kk_sampled_image_descriptor, min_lod_fp16) + // : offsetof(struct kk_sampled_image_descriptor, min_lod_uint16); + + // nir_def *min = load_resource_deref_desc( + // b, 1, 16, nir_src_as_deref(nir_src_for_ssa(texture)), + // plane_offset_B + offs, ctx); + + // nir_def_replace(&tex->def, min); + // return true; + // } + + // if (tex->op == nir_texop_has_custom_border_color_agx) { + // unsigned offs = offsetof(struct kk_sampled_image_descriptor, + // clamp_0_sampler_index_or_negative); + + // nir_def *res = load_resource_deref_desc( + // b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)), + // plane_offset_B + offs, ctx); + + // nir_def_replace(&tex->def, nir_ige_imm(b, res, 0)); + // return true; + // } + + if (tex->op == nir_texop_custom_border_color_agx) { + unsigned offs = offsetof(struct kk_sampled_image_descriptor, border); + + nir_def *border = load_resource_deref_desc( + b, 4, 32, nir_src_as_deref(nir_src_for_ssa(sampler)), + plane_offset_B + offs, ctx); + + nir_alu_type T = nir_alu_type_get_base_type(tex->dest_type); + border = nir_convert_to_bit_size(b, border, T, tex->def.bit_size); + + nir_def_replace(&tex->def, border); + return true; + } + + { + unsigned offs = + offsetof(struct kk_sampled_image_descriptor, image_gpu_resource_id); + + nir_def *resource_addr = load_resource_addr( + b, 1, 64, nir_src_as_deref(nir_src_for_ssa(texture)), + plane_offset_B + offs, ctx); + + nir_def *handle = NULL; + if (tex->is_shadow) { + handle = nir_load_depth_texture_kk(b, 1, 64, resource_addr, + .image_dim = tex->sampler_dim, + .image_array = tex->is_array); + } else { + handle = nir_load_texture_handle_kk( + b, 1, 64, resource_addr, .dest_type = tex->dest_type, + .image_dim = tex->sampler_dim, .image_array = tex->is_array); + } + nir_tex_instr_add_src(tex, nir_tex_src_texture_handle, handle); + } + + if (sampler != NULL) { + unsigned offs = + offsetof(struct kk_sampled_image_descriptor, sampler_index); + + nir_def *index = load_resource_deref_desc( + b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)), + plane_offset_B + offs, ctx); + + nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, + nir_load_sampler_handle_kk(b, index)); + } + + if (tex->op == nir_texop_lod) { + nir_def *lod_min = nir_f2f32( + b, load_resource_deref_desc( + b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)), + plane_offset_B + + offsetof(struct kk_sampled_image_descriptor, lod_min_fp16), + ctx)); + nir_def *lod_max = nir_f2f32( + b, load_resource_deref_desc( + b, 1, 16, nir_src_as_deref(nir_src_for_ssa(sampler)), + plane_offset_B + + offsetof(struct kk_sampled_image_descriptor, lod_max_fp16), + ctx)); + + nir_tex_instr_add_src(tex, nir_tex_src_min_lod, lod_min); + nir_tex_instr_add_src(tex, nir_tex_src_max_lod_kk, lod_max); + } + + return true; +} + +static bool +try_lower_descriptors_instr(nir_builder *b, nir_instr *instr, void *_data) +{ + const struct lower_descriptors_ctx *ctx = _data; + + switch (instr->type) { + case nir_instr_type_tex: + return lower_tex(b, nir_instr_as_tex(instr), ctx); + case nir_instr_type_intrinsic: + return try_lower_intrin(b, nir_instr_as_intrinsic(instr), ctx); + default: + return false; + } +} + +static bool +lower_ssbo_resource_index(nir_builder *b, nir_intrinsic_instr *intrin, + const struct lower_descriptors_ctx *ctx) +{ + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER && + desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + return false; + + b->cursor = nir_instr_remove(&intrin->instr); + + uint32_t set = nir_intrinsic_desc_set(intrin); + uint32_t binding = nir_intrinsic_binding(intrin); + nir_def *index = intrin->src[0].ssa; + + const struct kk_descriptor_set_binding_layout *binding_layout = + get_binding_layout(set, binding, ctx); + + nir_def *binding_addr; + uint8_t binding_stride; + switch (binding_layout->type) { + case VK_DESCRIPTOR_TYPE_MUTABLE_EXT: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: { + nir_def *set_addr = load_descriptor_set_addr(b, set, ctx); + binding_addr = nir_iadd_imm(b, set_addr, binding_layout->offset); + binding_stride = binding_layout->stride; + break; + } + + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { + nir_def *root_desc_addr = nir_load_buffer_ptr_kk(b, 1, 64, .binding = 0); + + nir_def *dynamic_buffer_start = + nir_iadd_imm(b, load_dynamic_buffer_start(b, set, ctx), + binding_layout->dynamic_buffer_index); + + nir_def *dynamic_binding_offset = + nir_iadd_imm(b, + nir_imul_imm(b, dynamic_buffer_start, + sizeof(struct kk_buffer_address)), + kk_root_descriptor_offset(dynamic_buffers)); + + binding_addr = + nir_iadd(b, root_desc_addr, nir_u2u64(b, dynamic_binding_offset)); + binding_stride = sizeof(struct kk_buffer_address); + break; + } + + default: + UNREACHABLE("Not an SSBO descriptor"); + } + + /* Tuck the stride in the top 8 bits of the binding address */ + binding_addr = nir_ior_imm(b, binding_addr, (uint64_t)binding_stride << 56); + + const uint32_t binding_size = binding_layout->array_size * binding_stride; + nir_def *offset_in_binding = nir_imul_imm(b, index, binding_stride); + + nir_def *addr = nir_vec4(b, nir_unpack_64_2x32_split_x(b, binding_addr), + nir_unpack_64_2x32_split_y(b, binding_addr), + nir_imm_int(b, binding_size), offset_in_binding); + + nir_def_rewrite_uses(&intrin->def, addr); + + return true; +} + +static bool +lower_ssbo_resource_reindex(nir_builder *b, nir_intrinsic_instr *intrin, + const struct lower_descriptors_ctx *ctx) +{ + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER && + desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + return false; + + b->cursor = nir_instr_remove(&intrin->instr); + + nir_def *addr = intrin->src[0].ssa; + nir_def *index = intrin->src[1].ssa; + + nir_def *addr_high32 = nir_channel(b, addr, 1); + nir_def *stride = nir_ushr_imm(b, addr_high32, 24); + nir_def *offset = nir_imul(b, index, stride); + + addr = nir_build_addr_iadd(b, addr, ctx->ssbo_addr_format, nir_var_mem_ssbo, + offset); + nir_def_rewrite_uses(&intrin->def, addr); + + return true; +} + +static bool +lower_load_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intrin, + const struct lower_descriptors_ctx *ctx) +{ + const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin); + if (desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER && + desc_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + return false; + + b->cursor = nir_instr_remove(&intrin->instr); + + nir_def *addr = intrin->src[0].ssa; + + nir_def *desc; + switch (ctx->ssbo_addr_format) { + case nir_address_format_64bit_global_32bit_offset: { + nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)); + nir_def *offset = nir_channel(b, addr, 3); + /* Mask off the binding stride */ + base = nir_iand_imm(b, base, BITFIELD64_MASK(56)); + desc = nir_load_global_constant_offset(b, 4, 32, base, offset, + .align_mul = 16, .align_offset = 0, + .access = ACCESS_CAN_SPECULATE); + break; + } + + case nir_address_format_64bit_bounded_global: { + nir_def *base = nir_pack_64_2x32(b, nir_trim_vector(b, addr, 2)); + nir_def *size = nir_channel(b, addr, 2); + nir_def *offset = nir_channel(b, addr, 3); + /* Mask off the binding stride */ + base = nir_iand_imm(b, base, BITFIELD64_MASK(56)); + desc = nir_load_global_constant_bounded( + b, 4, 32, base, offset, size, .align_mul = 16, .align_offset = 0, + .access = ACCESS_CAN_SPECULATE); + break; + } + + default: + UNREACHABLE("Unknown address mode"); + } + + nir_def_rewrite_uses(&intrin->def, desc); + + return true; +} + +static bool +lower_ssbo_descriptor(nir_builder *b, nir_intrinsic_instr *intr, void *_data) +{ + const struct lower_descriptors_ctx *ctx = _data; + + switch (intr->intrinsic) { + case nir_intrinsic_vulkan_resource_index: + return lower_ssbo_resource_index(b, intr, ctx); + case nir_intrinsic_vulkan_resource_reindex: + return lower_ssbo_resource_reindex(b, intr, ctx); + case nir_intrinsic_load_vulkan_descriptor: + return lower_load_ssbo_descriptor(b, intr, ctx); + default: + return false; + } +} + +bool +kk_nir_lower_descriptors(nir_shader *nir, + const struct vk_pipeline_robustness_state *rs, + uint32_t set_layout_count, + struct vk_descriptor_set_layout *const *set_layouts) +{ + struct lower_descriptors_ctx ctx = { + .clamp_desc_array_bounds = + rs->storage_buffers != + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT || + rs->uniform_buffers != + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT || + rs->images != VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT, + .ssbo_addr_format = kk_buffer_addr_format(rs->storage_buffers), + .ubo_addr_format = kk_buffer_addr_format(rs->uniform_buffers), + }; + + assert(set_layout_count <= KK_MAX_SETS); + for (uint32_t s = 0; s < set_layout_count; s++) { + if (set_layouts[s] != NULL) + ctx.set_layouts[s] = vk_to_kk_descriptor_set_layout(set_layouts[s]); + } + + /* First lower everything but complex SSBOs, then lower complex SSBOs. + * + * TODO: See if we can unify this, not sure if the fast path matters on + * Apple. This is inherited from NVK. + */ + bool pass_lower_descriptors = nir_shader_instructions_pass( + nir, try_lower_descriptors_instr, nir_metadata_control_flow, &ctx); + + bool pass_lower_ssbo = nir_shader_intrinsics_pass( + nir, lower_ssbo_descriptor, nir_metadata_control_flow, &ctx); + + return pass_lower_descriptors || pass_lower_ssbo; +} diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_multiview.c b/src/kosmickrisp/vulkan/kk_nir_lower_multiview.c new file mode 100644 index 00000000000..f90e50339f2 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_nir_lower_multiview.c @@ -0,0 +1,113 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_shader.h" + +#include "nir.h" +#include "nir_builder.h" + +/* View index maps to layer id in Metal */ +static bool +replace_view_index_with_zero(nir_builder *b, nir_intrinsic_instr *instr, + void *data) +{ + if (instr->intrinsic != nir_intrinsic_load_view_index) + return false; + + b->cursor = nir_before_instr(&instr->instr); + nir_def *layer_id = nir_load_layer_id(b); + nir_def_replace(&instr->def, layer_id); + return true; +} + +/* View index maps to layer id in Metal */ +static bool +replace_view_index_with_layer_id(nir_builder *b, nir_intrinsic_instr *instr, + void *data) +{ + if (instr->intrinsic != nir_intrinsic_load_view_index) + return false; + + b->cursor = nir_before_instr(&instr->instr); + nir_def *layer_id = nir_load_layer_id(b); + nir_def_replace(&instr->def, layer_id); + return true; +} + +static bool +replace_view_id_with_value(nir_builder *b, nir_intrinsic_instr *instr, + void *data) +{ + if (instr->intrinsic != nir_intrinsic_load_view_index) + return false; + + b->cursor = nir_before_instr(&instr->instr); + nir_def *view_index = (nir_def *)data; + nir_def_replace(&instr->def, view_index); + return true; +} + +bool +kk_nir_lower_vs_multiview(nir_shader *nir, uint32_t view_mask) +{ + assert(nir->info.stage == MESA_SHADER_VERTEX); + + /* Embed view indices and return */ + uint32_t view_count = util_bitcount(view_mask); + nir_function_impl *entrypoint = nir_shader_get_entrypoint(nir); + nir_builder b = nir_builder_at(nir_before_impl(entrypoint)); + + /* Create array and initialize */ + nir_variable *view_indices = nir_local_variable_create( + entrypoint, glsl_array_type(glsl_uint_type(), view_count, 0), + "view_indices"); + nir_deref_instr *view_indices_deref = nir_build_deref_var(&b, view_indices); + uint32_t count = 0u; + u_foreach_bit(index, view_mask) { + nir_store_deref( + &b, nir_build_deref_array_imm(&b, view_indices_deref, count++), + nir_imm_int(&b, index), 1); + } + + /* Access array based on the amplification id */ + nir_def *amplification_id = nir_load_amplification_id_kk(&b); + nir_def *view_index = nir_load_deref( + &b, nir_build_deref_array(&b, view_indices_deref, amplification_id)); + + bool progress = nir_shader_intrinsics_pass( + nir, replace_view_id_with_value, nir_metadata_control_flow, view_index); + + if (progress) { + BITSET_SET(nir->info.system_values_read, + SYSTEM_VALUE_AMPLIFICATION_ID_KK); + } + + /* With a single view index, Metal's vertex amplification will disregard the + * render target offset. We need to apply it ourselves in shader */ + if (view_count == 1u) { + nir_variable *layer_id = nir_create_variable_with_location( + nir, nir_var_shader_out, VARYING_SLOT_LAYER, glsl_uint_type()); + nir_deref_instr *layer_id_deref = nir_build_deref_var(&b, layer_id); + nir_def *view_index = nir_imm_int(&b, util_last_bit(view_mask) - 1u); + nir_store_deref(&b, layer_id_deref, view_index, 0xFFFFFFFF); + + nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_LAYER); + progress = true; + } + + return progress; +} + +bool +kk_nir_lower_fs_multiview(nir_shader *nir, uint32_t view_mask) +{ + if (view_mask == 0u) + return nir_shader_intrinsics_pass(nir, replace_view_index_with_zero, + nir_metadata_control_flow, NULL); + + return nir_shader_intrinsics_pass(nir, replace_view_index_with_layer_id, + nir_metadata_control_flow, NULL); +} diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_textures.c b/src/kosmickrisp/vulkan/kk_nir_lower_textures.c new file mode 100644 index 00000000000..85339be414c --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_nir_lower_textures.c @@ -0,0 +1,193 @@ +/* + * Copyright 2023 Valve Corporation + * Copyright 2021 Alyssa Rosenzweig + * Copyright 2020 Collabora Ltd. + * Copyright 2016 Broadcom + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ +#include "kk_private.h" + +#include "kk_descriptor_types.h" +#include "kk_shader.h" + +#include "nir.h" +#include "nir_builder.h" + +#include "stdbool.h" + +static bool +lower_texture_buffer_tex_instr(nir_builder *b, nir_tex_instr *tex) +{ + if (tex->sampler_dim != GLSL_SAMPLER_DIM_BUF) + return false; + + nir_steal_tex_src(tex, nir_tex_src_lod); + return true; +} + +static void +lower_1d_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin) +{ + nir_def *coord = intrin->src[1].ssa; + bool is_array = nir_intrinsic_image_array(intrin); + nir_def *zero = nir_imm_intN_t(b, 0, coord->bit_size); + + if (is_array) { + assert(coord->num_components >= 2); + coord = + nir_vec3(b, nir_channel(b, coord, 0), zero, nir_channel(b, coord, 1)); + } else { + assert(coord->num_components >= 1); + coord = nir_vec2(b, coord, zero); + } + + nir_src_rewrite(&intrin->src[1], nir_pad_vector(b, coord, 4)); + nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D); +} + +static nir_def * +txs_for_image(nir_builder *b, nir_intrinsic_instr *intr, + unsigned num_components, unsigned bit_size, bool query_samples) +{ + nir_tex_instr *tex = nir_tex_instr_create(b->shader, query_samples ? 1 : 2); + tex->op = query_samples ? nir_texop_texture_samples : nir_texop_txs; + tex->is_array = nir_intrinsic_image_array(intr); + tex->dest_type = nir_type_uint32; + tex->sampler_dim = nir_intrinsic_image_dim(intr); + + tex->src[0] = + nir_tex_src_for_ssa(nir_tex_src_texture_handle, intr->src[0].ssa); + + if (!query_samples) + tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_lod, intr->src[1].ssa); + + nir_def_init(&tex->instr, &tex->def, num_components, bit_size); + nir_builder_instr_insert(b, &tex->instr); + nir_def *res = &tex->def; + + /* Cube images are implemented as 2D arrays, so we need to divide here. */ + if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && res->num_components > 2 && + !query_samples) { + nir_def *divided = nir_udiv_imm(b, nir_channel(b, res, 2), 6); + res = nir_vector_insert_imm(b, res, divided, 2); + } + + return res; +} + +/* Cube textures need to be loaded as cube textures for sampling, but for + * storage we need to load them as 2d array since Metal does not support atomics + * on cube images. However, we don't know how the texture will be used when we + * load the handle so we need to do it when we actually use it. */ +static void +lower_cube_load_handle_to_2d_array(nir_def *handle) +{ + nir_instr *handle_parent = handle->parent_instr; + assert(handle_parent->type == nir_instr_type_intrinsic); + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(handle_parent); + assert(intrin->intrinsic == nir_intrinsic_load_texture_handle_kk); + assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE); + nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D); + nir_intrinsic_set_image_array(intrin, true); +} + +static void +lower_cube_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin) +{ + assert(nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE); + nir_def *coord = intrin->src[1].ssa; + if (nir_intrinsic_image_array(intrin)) { + assert(coord->num_components >= 4); + nir_def *layer_index = + nir_iadd(b, nir_channel(b, coord, 2), + nir_imul_imm(b, nir_channel(b, coord, 3), 6)); + coord = nir_vec4(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1), + layer_index, nir_imm_intN_t(b, 0, coord->bit_size)); + } + nir_src_rewrite(&intrin->src[1], nir_pad_vector(b, coord, 4)); + nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D); + nir_intrinsic_set_image_array(intrin, true); + + lower_cube_load_handle_to_2d_array(intrin->src[0].ssa); +} + +static bool +lower_image_load_store(nir_builder *b, nir_intrinsic_instr *intrin) +{ + b->cursor = nir_before_instr(&intrin->instr); + switch (intrin->intrinsic) { + case nir_intrinsic_load_texture_handle_kk: + switch (nir_intrinsic_image_dim(intrin)) { + case GLSL_SAMPLER_DIM_1D: + nir_intrinsic_set_image_dim(intrin, GLSL_SAMPLER_DIM_2D); + return true; + default: + return false; + } + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic: + case nir_intrinsic_image_atomic_swap: + case nir_intrinsic_bindless_image_load: + case nir_intrinsic_bindless_image_sparse_load: + case nir_intrinsic_bindless_image_store: + case nir_intrinsic_bindless_image_atomic: + case nir_intrinsic_bindless_image_atomic_swap: + switch (nir_intrinsic_image_dim(intrin)) { + case GLSL_SAMPLER_DIM_1D: + lower_1d_image_intrin(b, intrin); + return true; + case GLSL_SAMPLER_DIM_CUBE: + lower_cube_image_intrin(b, intrin); + return true; + default: + return false; + } + case nir_intrinsic_bindless_image_size: + case nir_intrinsic_bindless_image_samples: + nir_def_rewrite_uses( + &intrin->def, + txs_for_image( + b, intrin, intrin->def.num_components, intrin->def.bit_size, + intrin->intrinsic == nir_intrinsic_bindless_image_samples)); + return true; + default: + return false; + } +} + +static bool +lower_image(nir_builder *b, nir_instr *instr) +{ + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex = nir_instr_as_tex(instr); + return lower_texture_buffer_tex_instr(b, tex); + } else if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + return lower_image_load_store(b, intrin); + } + + return false; +} + +/* Must go after descriptor lowering to ensure the instr we introduce are also + * lowered */ +bool +kk_nir_lower_textures(nir_shader *nir) +{ + bool progress = false; + nir_foreach_function_impl(impl, nir) { + nir_foreach_block_safe(block, impl) { + nir_builder b = nir_builder_create(impl); + bool progress_impl = false; + nir_foreach_instr_safe(instr, block) { + progress_impl |= lower_image(&b, instr); + } + progress |= + nir_progress(progress_impl, impl, nir_metadata_control_flow); + } + } + return progress; +} diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_vbo.c b/src/kosmickrisp/vulkan/kk_nir_lower_vbo.c new file mode 100644 index 00000000000..7dc3764a6b2 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_nir_lower_vbo.c @@ -0,0 +1,279 @@ +/* + * Copyright 2022 Alyssa Rosenzweig + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_nir_lower_vbo.h" + +#include "kk_cmd_buffer.h" + +#include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_format_convert.h" +#include "util/bitset.h" +#include "util/u_math.h" +#include "shader_enums.h" + +struct ctx { + struct kk_attribute *attribs; + bool requires_vertex_id; + bool requires_instance_id; + bool requires_base_instance; +}; + +static bool +is_rgb10_a2(const struct util_format_description *desc) +{ + return desc->channel[0].shift == 0 && desc->channel[0].size == 10 && + desc->channel[1].shift == 10 && desc->channel[1].size == 10 && + desc->channel[2].shift == 20 && desc->channel[2].size == 10 && + desc->channel[3].shift == 30 && desc->channel[3].size == 2; +} + +static bool +is_rg11_b10(const struct util_format_description *desc) +{ + return desc->channel[0].shift == 0 && desc->channel[0].size == 11 && + desc->channel[1].shift == 11 && desc->channel[1].size == 11 && + desc->channel[2].shift == 22 && desc->channel[2].size == 10; +} + +static enum pipe_format +kk_vbo_internal_format(enum pipe_format format) +{ + const struct util_format_description *desc = util_format_description(format); + + /* RGB10A2 and RG11B10 require loading as uint and then unpack */ + if (is_rgb10_a2(desc) || is_rg11_b10(desc)) + return PIPE_FORMAT_R32_UINT; + + /* R11G11B10F is native and special */ + if (format == PIPE_FORMAT_R11G11B10_FLOAT) + return format; + + /* No other non-array formats handled */ + if (!desc->is_array) + return PIPE_FORMAT_NONE; + + /* Otherwise look at one (any) channel */ + int idx = util_format_get_first_non_void_channel(format); + if (idx < 0) + return PIPE_FORMAT_NONE; + + /* We only handle RGB formats (we could do SRGB if we wanted though?) */ + if ((desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) || + (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)) + return PIPE_FORMAT_NONE; + + /* We have native 8-bit and 16-bit normalized formats */ + struct util_format_channel_description chan = desc->channel[idx]; + + /* Otherwise map to the corresponding integer format */ + switch (chan.size) { + case 32: + return PIPE_FORMAT_R32_UINT; + case 16: + return PIPE_FORMAT_R16_UINT; + case 8: + return PIPE_FORMAT_R8_UINT; + default: + return PIPE_FORMAT_NONE; + } +} + +bool +kk_vbo_supports_format(enum pipe_format format) +{ + return kk_vbo_internal_format(format) != PIPE_FORMAT_NONE; +} + +static nir_def * +apply_swizzle_channel(nir_builder *b, nir_def *vec, unsigned swizzle, + bool is_int) +{ + switch (swizzle) { + case PIPE_SWIZZLE_X: + return nir_channel(b, vec, 0); + case PIPE_SWIZZLE_Y: + return nir_channel(b, vec, 1); + case PIPE_SWIZZLE_Z: + return nir_channel(b, vec, 2); + case PIPE_SWIZZLE_W: + return nir_channel(b, vec, 3); + case PIPE_SWIZZLE_0: + return nir_imm_intN_t(b, 0, vec->bit_size); + case PIPE_SWIZZLE_1: + return is_int ? nir_imm_intN_t(b, 1, vec->bit_size) + : nir_imm_floatN_t(b, 1.0, vec->bit_size); + default: + UNREACHABLE("Invalid swizzle channel"); + } +} + +static bool +pass(struct nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + if (intr->intrinsic != nir_intrinsic_load_input) + return false; + + struct ctx *ctx = data; + struct kk_attribute *attribs = ctx->attribs; + b->cursor = nir_instr_remove(&intr->instr); + + nir_src *offset_src = nir_get_io_offset_src(intr); + assert(nir_src_is_const(*offset_src) && "no attribute indirects"); + unsigned index = nir_intrinsic_base(intr) + nir_src_as_uint(*offset_src); + + struct kk_attribute attrib = attribs[index]; + + const struct util_format_description *desc = + util_format_description(attrib.format); + int chan = util_format_get_first_non_void_channel(attrib.format); + assert(chan >= 0); + + bool is_float = desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT; + bool is_unsigned = desc->channel[chan].type == UTIL_FORMAT_TYPE_UNSIGNED; + bool is_signed = desc->channel[chan].type == UTIL_FORMAT_TYPE_SIGNED; + bool is_fixed = desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED; + bool is_int = util_format_is_pure_integer(attrib.format); + + assert((is_float ^ is_unsigned ^ is_signed ^ is_fixed) && "Invalid format"); + + enum pipe_format interchange_format = kk_vbo_internal_format(attrib.format); + assert(interchange_format != PIPE_FORMAT_NONE); + + unsigned interchange_align = util_format_get_blocksize(interchange_format); + unsigned interchange_comps = util_format_get_nr_components(attrib.format); + + /* In the hardware, uint formats zero-extend and float formats convert. + * However, non-uint formats using a uint interchange format shouldn't be + * zero extended. + */ + unsigned interchange_register_size = + util_format_is_pure_uint(interchange_format) ? (interchange_align * 8) + : intr->def.bit_size; + + /* Non-UNORM R10G10B10A2 loaded as a scalar and unpacked */ + if (interchange_format == PIPE_FORMAT_R32_UINT && !desc->is_array) + interchange_comps = 1; + + /* Calculate the element to fetch the vertex for. Divide the instance ID by + * the divisor for per-instance data. Divisor=0 specifies per-vertex data. + */ + nir_def *el; + if (attrib.instanced) { + if (attrib.divisor > 0) { + /* Metal's instance_id has base_instance included */ + nir_def *instance_id = + nir_isub(b, nir_load_instance_id(b), nir_load_base_instance(b)); + el = nir_udiv_imm(b, instance_id, attrib.divisor); + ctx->requires_instance_id = true; + } else + el = nir_imm_int(b, 0); + + el = nir_iadd(b, el, nir_load_base_instance(b)); + ctx->requires_base_instance = true; + + BITSET_SET(b->shader->info.system_values_read, + SYSTEM_VALUE_BASE_INSTANCE); + } else { + el = nir_load_vertex_id(b); + ctx->requires_vertex_id = true; + } + + /* Load the pointer of the buffer from the argument buffer */ + nir_def *argbuf = nir_load_buffer_ptr_kk(b, 1, 64, .binding = 0); + uint64_t attrib_base_offset = + offsetof(struct kk_root_descriptor_table, draw.attrib_base[index]); + nir_def *base = nir_load_global_constant( + b, nir_iadd_imm(b, argbuf, attrib_base_offset), 8, 1, 64); + + uint64_t buffer_stride_offset = offsetof( + struct kk_root_descriptor_table, draw.buffer_strides[attrib.binding]); + nir_def *stride = nir_load_global_constant( + b, nir_iadd_imm(b, argbuf, buffer_stride_offset), 4, 1, 32); + nir_def *stride_offset_el = + nir_imul(b, el, nir_udiv_imm(b, stride, interchange_align)); + + /* Load the raw vector */ + nir_def *memory = nir_load_constant_agx( + b, interchange_comps, interchange_register_size, base, stride_offset_el, + .format = interchange_format, .base = 0u); + + unsigned dest_size = intr->def.bit_size; + unsigned bits[] = {desc->channel[chan].size, desc->channel[chan].size, + desc->channel[chan].size, desc->channel[chan].size}; + + /* Unpack non-native formats */ + if (is_rg11_b10(desc)) { + memory = nir_format_unpack_11f11f10f(b, memory); + } else if (is_rgb10_a2(desc)) { + bits[0] = 10; + bits[1] = 10; + bits[2] = 10; + bits[3] = 2; + if (is_signed) + memory = nir_format_unpack_sint(b, memory, bits, 4); + else + memory = nir_format_unpack_uint(b, memory, bits, 4); + } + + if (desc->channel[chan].normalized) { + if (is_signed) + memory = nir_format_snorm_to_float(b, memory, bits); + else + memory = nir_format_unorm_to_float(b, memory, bits); + } else if (desc->channel[chan].pure_integer) { + if (is_signed) + memory = nir_i2iN(b, memory, dest_size); + else + memory = nir_u2uN(b, memory, dest_size); + } else { + if (is_unsigned) + memory = nir_u2fN(b, memory, dest_size); + else if (is_signed || is_fixed) + memory = nir_i2fN(b, memory, dest_size); + else + memory = nir_f2fN(b, memory, dest_size); + + /* 16.16 fixed-point weirdo GL formats need to be scaled */ + if (is_fixed) { + assert(desc->is_array && desc->channel[chan].size == 32); + assert(dest_size == 32 && "overflow if smaller"); + memory = nir_fmul_imm(b, memory, 1.0 / 65536.0); + } + } + + /* We now have a properly formatted vector of the components in memory. Apply + * the format swizzle forwards to trim/pad/reorder as needed. + */ + nir_def *channels[4] = {NULL}; + + for (unsigned i = 0; i < intr->num_components; ++i) { + unsigned c = nir_intrinsic_component(intr) + i; + channels[i] = apply_swizzle_channel(b, memory, desc->swizzle[c], is_int); + } + + nir_def *logical = nir_vec(b, channels, intr->num_components); + nir_def_rewrite_uses(&intr->def, logical); + return true; +} + +bool +kk_nir_lower_vbo(nir_shader *nir, struct kk_attribute *attribs) +{ + assert(nir->info.stage == MESA_SHADER_VERTEX); + + struct ctx ctx = {.attribs = attribs}; + bool progress = + nir_shader_intrinsics_pass(nir, pass, nir_metadata_control_flow, &ctx); + + if (ctx.requires_instance_id) + BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_INSTANCE_ID); + if (ctx.requires_base_instance) + BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_BASE_INSTANCE); + if (ctx.requires_vertex_id) + BITSET_SET(nir->info.system_values_read, SYSTEM_VALUE_VERTEX_ID); + return progress; +} diff --git a/src/kosmickrisp/vulkan/kk_nir_lower_vbo.h b/src/kosmickrisp/vulkan/kk_nir_lower_vbo.h new file mode 100644 index 00000000000..436e070794a --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_nir_lower_vbo.h @@ -0,0 +1,44 @@ +/* + * Copyright 2022 Alyssa Rosenzweig + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include "util/format/u_formats.h" +#include "nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define KK_MAX_ATTRIBS (32) +#define KK_MAX_VBUFS (32) + +/* See pipe_vertex_element for justification on the sizes. This structure should + * be small so it can be embedded into a shader key. + */ +struct kk_attribute { + /* If instanced, Zero means all get the same value (Vulkan semantics). */ + uint32_t divisor; + /* Buffer binding to load stride from root_table */ + uint32_t binding; + + /* pipe_format, all vertex formats should be <= 255 */ + uint8_t format; + + unsigned buf : 7; + bool instanced : 1; +}; + +bool kk_nir_lower_vbo(nir_shader *shader, struct kk_attribute *attribs); + +bool kk_vbo_supports_format(enum pipe_format format); + +#ifdef __cplusplus +} /* extern C */ +#endif
\ No newline at end of file diff --git a/src/kosmickrisp/vulkan/kk_physical_device.c b/src/kosmickrisp/vulkan/kk_physical_device.c new file mode 100644 index 00000000000..2f9ce537f6e --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_physical_device.c @@ -0,0 +1,1032 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_physical_device.h" + +#include "kk_entrypoints.h" +#include "kk_image.h" +#include "kk_instance.h" +#include "kk_nir_lower_vbo.h" +#include "kk_sync.h" +#include "kk_wsi.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +#include "util/disk_cache.h" +#include "util/mesa-sha1.h" +#include "git_sha1.h" + +#include "vulkan/wsi/wsi_common.h" +#include "vk_device.h" +#include "vk_drm_syncobj.h" +#include "vk_shader_module.h" + +static uint32_t +kk_get_vk_version() +{ + /* Version override takes priority */ + const uint32_t version_override = vk_get_version_override(); + if (version_override) + return version_override; + + return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION); +} + +static void +kk_get_device_extensions(const struct kk_instance *instance, + struct vk_device_extension_table *ext) +{ + *ext = (struct vk_device_extension_table){ + /* Vulkan 1.1 */ + .KHR_16bit_storage = true, + .KHR_bind_memory2 = true, + .KHR_dedicated_allocation = true, + .KHR_descriptor_update_template = true, + .KHR_device_group = true, + .KHR_external_fence = true, + .KHR_external_memory = true, + .KHR_external_semaphore = true, + .KHR_get_memory_requirements2 = true, + .KHR_maintenance1 = true, + .KHR_maintenance2 = true, + .KHR_maintenance3 = true, + .KHR_multiview = true, + .KHR_relaxed_block_layout = true, + .KHR_sampler_ycbcr_conversion = true, + .KHR_shader_draw_parameters = false, + .KHR_storage_buffer_storage_class = true, + .KHR_variable_pointers = true, + + /* Vulkan 1.2 */ + .KHR_8bit_storage = true, + .KHR_buffer_device_address = true, /* Required in Vulkan 1.3 */ + .KHR_create_renderpass2 = true, + .KHR_depth_stencil_resolve = true, + .KHR_draw_indirect_count = false, + .KHR_driver_properties = true, + .KHR_image_format_list = true, + .KHR_imageless_framebuffer = true, + .KHR_sampler_mirror_clamp_to_edge = false, + .KHR_separate_depth_stencil_layouts = true, + .KHR_shader_atomic_int64 = false, + .KHR_shader_float16_int8 = + false, /* TODO_KOSMICKRISP shaderInt8 shaderFloat16 */ + .KHR_shader_float_controls = true, + .KHR_shader_subgroup_extended_types = true, + .KHR_spirv_1_4 = true, + .KHR_timeline_semaphore = true, + .KHR_uniform_buffer_standard_layout = true, + .KHR_vulkan_memory_model = true, /* Required in Vulkan 1.3 */ + .EXT_descriptor_indexing = true, + .EXT_host_query_reset = true, + .EXT_sampler_filter_minmax = false, + .EXT_scalar_block_layout = true, + .EXT_separate_stencil_usage = true, + .EXT_shader_viewport_index_layer = false, + + /* Vulkan 1.3 */ + .KHR_copy_commands2 = true, + .KHR_dynamic_rendering = true, + .KHR_format_feature_flags2 = true, + .KHR_maintenance4 = true, + .KHR_shader_integer_dot_product = true, + .KHR_shader_non_semantic_info = true, + .KHR_shader_terminate_invocation = true, + .KHR_synchronization2 = true, + .KHR_zero_initialize_workgroup_memory = true, + .EXT_4444_formats = false, + .EXT_extended_dynamic_state = true, + .EXT_extended_dynamic_state2 = false, + .EXT_image_robustness = true, + .EXT_inline_uniform_block = true, + .EXT_pipeline_creation_cache_control = true, + .EXT_pipeline_creation_feedback = true, + .EXT_private_data = true, + .EXT_shader_demote_to_helper_invocation = true, + .EXT_subgroup_size_control = true, + .EXT_texel_buffer_alignment = false, + .EXT_texture_compression_astc_hdr = false, + .EXT_tooling_info = true, + .EXT_ycbcr_2plane_444_formats = false, + + /* Vulkan 1.4 */ + .KHR_push_descriptor = true, + + /* Optional extensions */ +#ifdef KK_USE_WSI_PLATFORM + .KHR_swapchain = true, + .KHR_swapchain_mutable_format = true, +#endif + .EXT_external_memory_metal = true, + .EXT_mutable_descriptor_type = true, + .EXT_shader_replicated_composites = true, + + .KHR_shader_expect_assume = true, + .KHR_shader_maximal_reconvergence = true, + .KHR_shader_relaxed_extended_instruction = true, + .KHR_shader_subgroup_uniform_control_flow = true, + + .GOOGLE_decorate_string = true, + .GOOGLE_hlsl_functionality1 = true, + .GOOGLE_user_type = true, + }; +} + +static void +kk_get_device_features( + const struct vk_device_extension_table *supported_extensions, + struct vk_features *features) +{ + *features = (struct vk_features){ + /* Vulkan 1.0 */ + .robustBufferAccess = true, + .depthClamp = true, + .drawIndirectFirstInstance = true, + /* TODO_KOSMICKRISP + * Enabling fragmentStoresAndAtomics fails the following CTS tests, need + * to investigate: + * dEQP-VK.fragment_operations.early_fragment.discard_no_early_fragment_tests_depth + * dEQP-VK.robustness.image_robustness.bind.notemplate.*i.unroll.nonvolatile.sampled_image.no_fmt_qual.img.samples_1.*d_array.frag + */ + .fragmentStoresAndAtomics = false, + .imageCubeArray = true, + .shaderInt16 = true, + .shaderInt64 = true, + .shaderResourceMinLod = true, + /* TODO_KOSMICKRISP + * Disabled because the following test + * dEQP-VK.api.format_feature_flags2.r8_unorm and similars fail, need to + * set VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT and + * VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT for those formats. + * This may trigger more tests that haven't been run yet */ + .shaderStorageImageReadWithoutFormat = false, + .shaderStorageImageWriteWithoutFormat = false, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = true, + .shaderStorageBufferArrayDynamicIndexing = true, + .shaderStorageImageArrayDynamicIndexing = true, + + /* Vulkan 1.1 */ + .multiview = true, + .storageBuffer16BitAccess = true, + .storageInputOutput16 = false, + .storagePushConstant16 = true, + .variablePointersStorageBuffer = true, + .variablePointers = true, + .uniformAndStorageBuffer16BitAccess = true, + + /* Vulkan 1.2 */ + .descriptorBindingInlineUniformBlockUpdateAfterBind = true, + .descriptorBindingPartiallyBound = true, + .descriptorBindingSampledImageUpdateAfterBind = true, + .descriptorBindingStorageBufferUpdateAfterBind = true, + .descriptorBindingStorageImageUpdateAfterBind = true, + .descriptorBindingStorageTexelBufferUpdateAfterBind = true, + .descriptorBindingUniformBufferUpdateAfterBind = true, + .descriptorBindingUniformTexelBufferUpdateAfterBind = true, + .descriptorBindingUpdateUnusedWhilePending = true, + .descriptorBindingVariableDescriptorCount = true, + .descriptorIndexing = true, + .hostQueryReset = true, + .imagelessFramebuffer = true, + .runtimeDescriptorArray = true, + .scalarBlockLayout = true, + .separateDepthStencilLayouts = true, + /* TODO_KOSMICKRISP shaderFloat16 + * Failing: + * dEQP-VK.spirv_assembly.instruction.compute.float16.opcompositeinsert.v4f16 + * dEQP-VK.spirv_assembly.instruction.compute.float16.opcompositeinsert.v2f16arr5 + * dEQP-VK.spirv_assembly.instruction.compute.float16.opcompositeinsert.v3f16arr5 + * dEQP-VK.spirv_assembly.instruction.compute.float16.opcompositeinsert.v4f16arr3 + * dEQP-VK.spirv_assembly.instruction.compute.float16.opcompositeinsert.struct16arr3 + * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.v3f16_frag + * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.v4f16_frag + * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.v2f16arr5_frag + * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.v3f16arr5_frag + * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.v4f16arr3_frag + * dEQP-VK.spirv_assembly.instruction.graphics.float16.opcompositeinsert.struct16arr3_frag + * dEQP-VK.memory_model.shared.16bit.nested_structs_arrays.0 + * dEQP-VK.memory_model.shared.16bit.nested_structs_arrays.4 + */ + .shaderFloat16 = false, + .shaderInputAttachmentArrayDynamicIndexing = true, + .shaderInputAttachmentArrayNonUniformIndexing = true, + /* TODO_KOSMICKRISP shaderInt8 + * Multiple MSL compiler crashes if we enable shaderInt8, need to + * understand why and a workaround: + * dEQP-VK.memory_model.shared.8bit.vector_types.9 + * dEQP-VK.memory_model.shared.8bit.basic_types.8 + * dEQP-VK.memory_model.shared.8bit.basic_arrays.2 + * dEQP-VK.memory_model.shared.8bit.arrays_of_arrays.1 + * dEQP-VK.memory_model.shared.8bit.arrays_of_arrays.8 + * Probably more + */ + .shaderInt8 = false, + .shaderOutputViewportIndex = true, + .shaderOutputLayer = true, + .shaderSampledImageArrayNonUniformIndexing = true, + .shaderStorageBufferArrayNonUniformIndexing = true, + .shaderStorageTexelBufferArrayDynamicIndexing = true, + .shaderSubgroupExtendedTypes = true, + .shaderUniformTexelBufferArrayDynamicIndexing = true, + .shaderUniformTexelBufferArrayNonUniformIndexing = true, + .storageBuffer8BitAccess = true, + .storagePushConstant8 = true, + .subgroupBroadcastDynamicId = true, + .timelineSemaphore = true, + .uniformAndStorageBuffer8BitAccess = true, + .uniformBufferStandardLayout = true, + + /* Vulkan 1.3 */ + .bufferDeviceAddress = true, + .computeFullSubgroups = true, + .dynamicRendering = true, + .inlineUniformBlock = true, + .maintenance4 = true, + .pipelineCreationCacheControl = true, + .privateData = true, + .robustImageAccess = true, + .shaderDemoteToHelperInvocation = true, + .shaderIntegerDotProduct = true, + .shaderTerminateInvocation = true, + .shaderZeroInitializeWorkgroupMemory = true, + .subgroupSizeControl = true, + .synchronization2 = true, + .vulkanMemoryModel = true, + .vulkanMemoryModelDeviceScope = true, + + /* Optional features */ + .samplerAnisotropy = true, + .samplerYcbcrConversion = true, + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = true, + + /* VK_EXT_mutable_descriptor_type */ + .mutableDescriptorType = true, + + /* VK_KHR_shader_expect_assume */ + .shaderExpectAssume = true, + + /* VK_KHR_shader_maximal_reconvergence */ + .shaderMaximalReconvergence = true, + + /* VK_KHR_shader_relaxed_extended_instruction */ + .shaderRelaxedExtendedInstruction = true, + + /* VK_EXT_shader_replicated_composites */ + .shaderReplicatedComposites = true, + + /* VK_KHR_shader_subgroup_uniform_control_flow */ + .shaderSubgroupUniformControlFlow = true, + }; +} + +static void +kk_get_device_properties(const struct kk_physical_device *pdev, + const struct kk_instance *instance, + struct vk_properties *properties) +{ + const VkSampleCountFlagBits sample_counts = + VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | + // TODO_KOSMICKRISP Modify sample count based on what pdev supports + VK_SAMPLE_COUNT_4_BIT /* | + VK_SAMPLE_COUNT_8_BIT */ + ; + + assert(sample_counts <= (KK_MAX_SAMPLES << 1) - 1); + + uint64_t os_page_size = 4096; + os_get_page_size(&os_page_size); + + *properties = (struct vk_properties){ + .apiVersion = kk_get_vk_version(), + .driverVersion = vk_get_driver_version(), + .vendorID = instance->force_vk_vendor != 0 ? instance->force_vk_vendor + : 0x106b, + .deviceID = 100, + .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, + + /* Vulkan 1.0 limits */ + /* Values taken from Apple7 + https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf */ + .maxImageDimension1D = kk_image_max_dimension(VK_IMAGE_TYPE_2D), + .maxImageDimension2D = kk_image_max_dimension(VK_IMAGE_TYPE_2D), + .maxImageDimension3D = kk_image_max_dimension(VK_IMAGE_TYPE_3D), + .maxImageDimensionCube = 16384, + .maxImageArrayLayers = 2048, + .maxTexelBufferElements = 256 * 1024 * 1024, + .maxUniformBufferRange = 65536, + .maxStorageBufferRange = UINT32_MAX, + .maxPushConstantsSize = KK_MAX_PUSH_SIZE, + .maxMemoryAllocationCount = 4096, + .maxSamplerAllocationCount = 4000, + .bufferImageGranularity = 16, + .sparseAddressSpaceSize = KK_SPARSE_ADDR_SPACE_SIZE, + .maxBoundDescriptorSets = KK_MAX_SETS, + .maxPerStageDescriptorSamplers = KK_MAX_DESCRIPTORS, + .maxPerStageDescriptorUniformBuffers = KK_MAX_DESCRIPTORS, + .maxPerStageDescriptorStorageBuffers = KK_MAX_DESCRIPTORS, + .maxPerStageDescriptorSampledImages = KK_MAX_DESCRIPTORS, + .maxPerStageDescriptorStorageImages = KK_MAX_DESCRIPTORS, + .maxPerStageDescriptorInputAttachments = KK_MAX_DESCRIPTORS, + .maxPerStageResources = UINT32_MAX, + .maxDescriptorSetSamplers = KK_MAX_DESCRIPTORS, + .maxDescriptorSetUniformBuffers = KK_MAX_DESCRIPTORS, + .maxDescriptorSetUniformBuffersDynamic = KK_MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetStorageBuffers = KK_MAX_DESCRIPTORS, + .maxDescriptorSetStorageBuffersDynamic = KK_MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetSampledImages = KK_MAX_DESCRIPTORS, + .maxDescriptorSetStorageImages = KK_MAX_DESCRIPTORS, + .maxDescriptorSetInputAttachments = KK_MAX_DESCRIPTORS, + .maxVertexInputAttributes = KK_MAX_ATTRIBS, + .maxVertexInputBindings = KK_MAX_VBUFS, + .maxVertexInputAttributeOffset = 2047, + .maxVertexInputBindingStride = 2048, + .maxVertexOutputComponents = 128, + .maxTessellationGenerationLevel = 64, + .maxTessellationPatchSize = 32, + .maxTessellationControlPerVertexInputComponents = 128, + .maxTessellationControlPerVertexOutputComponents = 128, + .maxTessellationControlPerPatchOutputComponents = 120, + .maxTessellationControlTotalOutputComponents = 4216, + .maxTessellationEvaluationInputComponents = 128, + .maxTessellationEvaluationOutputComponents = 128, + .maxGeometryShaderInvocations = 32, + .maxGeometryInputComponents = 128, + .maxGeometryOutputComponents = 128, + .maxGeometryOutputVertices = 1024, + .maxGeometryTotalOutputComponents = 1024, + .maxFragmentInputComponents = 128, + .maxFragmentOutputAttachments = KK_MAX_RTS, + .maxFragmentDualSrcAttachments = 1, + .maxFragmentCombinedOutputResources = 16, + .maxComputeSharedMemorySize = KK_MAX_SHARED_SIZE, + .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535}, + .maxComputeWorkGroupInvocations = pdev->info.max_workgroup_invocations, + .maxComputeWorkGroupSize = {pdev->info.max_workgroup_count[0], + pdev->info.max_workgroup_count[1], + pdev->info.max_workgroup_count[2]}, + .subPixelPrecisionBits = 8, + .subTexelPrecisionBits = 8, + .mipmapPrecisionBits = 8, + .maxDrawIndexedIndexValue = UINT32_MAX, + .maxDrawIndirectCount = UINT32_MAX, + .maxSamplerLodBias = 15, + .maxSamplerAnisotropy = 16, + .maxViewports = KK_MAX_VIEWPORTS, + .maxViewportDimensions = {32768, 32768}, + .viewportBoundsRange = {-65536, 65536}, + .viewportSubPixelBits = 8, + .minMemoryMapAlignment = os_page_size, + .minTexelBufferOffsetAlignment = KK_MIN_TEXEL_BUFFER_ALIGNMENT, + .minUniformBufferOffsetAlignment = KK_MIN_UBO_ALIGNMENT, + .minStorageBufferOffsetAlignment = KK_MIN_SSBO_ALIGNMENT, + .minTexelOffset = -8, + .maxTexelOffset = 7, + .minTexelGatherOffset = -32, + .maxTexelGatherOffset = 31, + .minInterpolationOffset = -0.5, + .maxInterpolationOffset = 0.4375, + .subPixelInterpolationOffsetBits = 4, + .maxFramebufferHeight = 16384, + .maxFramebufferWidth = 16384, + .maxFramebufferLayers = 2048, + .framebufferColorSampleCounts = sample_counts, + .framebufferDepthSampleCounts = sample_counts, + .framebufferNoAttachmentsSampleCounts = sample_counts, + .framebufferStencilSampleCounts = sample_counts, + .maxColorAttachments = KK_MAX_RTS, + .sampledImageColorSampleCounts = sample_counts, + .sampledImageIntegerSampleCounts = sample_counts, + .sampledImageDepthSampleCounts = sample_counts, + .sampledImageStencilSampleCounts = sample_counts, + .storageImageSampleCounts = sample_counts, + .maxSampleMaskWords = 1, + .timestampComputeAndGraphics = false, + .timestampPeriod = 1, + .maxClipDistances = 8, + .maxCullDistances = 8, + .maxCombinedClipAndCullDistances = 8, + .discreteQueuePriorities = 2, + .pointSizeRange = {1.0f, 1.0f}, + .lineWidthRange = {1.0f, 1.0f}, + .pointSizeGranularity = 0.0f, + .lineWidthGranularity = 0.0f, + .strictLines = false, + .standardSampleLocations = true, + .optimalBufferCopyOffsetAlignment = 1, + .optimalBufferCopyRowPitchAlignment = 1, + .nonCoherentAtomSize = 64, + + /* Vulkan 1.0 sparse properties */ + .sparseResidencyNonResidentStrict = false, + .sparseResidencyAlignedMipSize = false, + .sparseResidencyStandard2DBlockShape = false, + .sparseResidencyStandard2DMultisampleBlockShape = false, + .sparseResidencyStandard3DBlockShape = false, + + /* Vulkan 1.1 properties */ + .subgroupSize = 32, + .subgroupSupportedStages = + VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, + .subgroupSupportedOperations = + VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT | + VK_SUBGROUP_FEATURE_VOTE_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT | + VK_SUBGROUP_FEATURE_SHUFFLE_BIT | + VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | + VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR, // | TODO_KOSMICKRISP + // VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | + // VK_SUBGROUP_FEATURE_CLUSTERED_BIT | + // VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR, + .subgroupQuadOperationsInAllStages = true, + .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY, + .maxMultiviewViewCount = KK_MAX_MULTIVIEW_VIEW_COUNT, + .maxMultiviewInstanceIndex = UINT32_MAX, + .maxPerSetDescriptors = UINT32_MAX, + .maxMemoryAllocationSize = (1u << 31), + + /* Vulkan 1.2 properties */ + .supportedDepthResolveModes = + VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT | + VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT, + .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | + VK_RESOLVE_MODE_MIN_BIT | + VK_RESOLVE_MODE_MAX_BIT, + .independentResolveNone = true, + .independentResolve = true, + .driverID = VK_DRIVER_ID_MESA_HONEYKRISP, // TODO_KOSMICKRISP Have our own + .conformanceVersion = (VkConformanceVersion){1, 4, 3, 2}, + .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE, + .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE, + .shaderSignedZeroInfNanPreserveFloat16 = false, + .shaderSignedZeroInfNanPreserveFloat32 = false, + .shaderSignedZeroInfNanPreserveFloat64 = false, + .shaderDenormPreserveFloat16 = false, + .shaderDenormPreserveFloat32 = false, + .shaderDenormPreserveFloat64 = false, + .shaderDenormFlushToZeroFloat16 = false, + .shaderDenormFlushToZeroFloat32 = false, + .shaderDenormFlushToZeroFloat64 = false, + .shaderRoundingModeRTEFloat16 = false, + .shaderRoundingModeRTEFloat32 = false, + .shaderRoundingModeRTEFloat64 = false, + .shaderRoundingModeRTZFloat16 = false, + .shaderRoundingModeRTZFloat32 = false, + .shaderRoundingModeRTZFloat64 = false, + .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX, + .shaderUniformBufferArrayNonUniformIndexingNative = true, + .shaderSampledImageArrayNonUniformIndexingNative = true, + .shaderStorageBufferArrayNonUniformIndexingNative = true, + .shaderStorageImageArrayNonUniformIndexingNative = true, + .shaderInputAttachmentArrayNonUniformIndexingNative = true, + .robustBufferAccessUpdateAfterBind = true, + .quadDivergentImplicitLod = false, + .maxPerStageDescriptorUpdateAfterBindSamplers = KK_MAX_DESCRIPTORS, + .maxPerStageDescriptorUpdateAfterBindUniformBuffers = KK_MAX_DESCRIPTORS, + .maxPerStageDescriptorUpdateAfterBindStorageBuffers = KK_MAX_DESCRIPTORS, + .maxPerStageDescriptorUpdateAfterBindSampledImages = KK_MAX_DESCRIPTORS, + .maxPerStageDescriptorUpdateAfterBindStorageImages = KK_MAX_DESCRIPTORS, + .maxPerStageDescriptorUpdateAfterBindInputAttachments = + KK_MAX_DESCRIPTORS, + .maxPerStageUpdateAfterBindResources = UINT32_MAX, + .maxDescriptorSetUpdateAfterBindSamplers = KK_MAX_DESCRIPTORS, + .maxDescriptorSetUpdateAfterBindUniformBuffers = KK_MAX_DESCRIPTORS, + .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = + KK_MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetUpdateAfterBindStorageBuffers = KK_MAX_DESCRIPTORS, + .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = + KK_MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetUpdateAfterBindSampledImages = KK_MAX_DESCRIPTORS, + .maxDescriptorSetUpdateAfterBindStorageImages = KK_MAX_DESCRIPTORS, + .maxDescriptorSetUpdateAfterBindInputAttachments = KK_MAX_DESCRIPTORS, + .filterMinmaxSingleComponentFormats = false, + .filterMinmaxImageComponentMapping = false, + .maxTimelineSemaphoreValueDifference = UINT64_MAX, + .framebufferIntegerColorSampleCounts = sample_counts, + + /* Vulkan 1.3 properties */ + .minSubgroupSize = 32, + .maxSubgroupSize = 32, + .maxComputeWorkgroupSubgroups = pdev->info.max_workgroup_invocations / 32, + .requiredSubgroupSizeStages = 0, + .maxInlineUniformBlockSize = 1 << 16, + .maxPerStageDescriptorInlineUniformBlocks = 32, + .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32, + .maxDescriptorSetInlineUniformBlocks = 6 * 32, + .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32, + .maxInlineUniformTotalSize = 1 << 16, + .integerDotProduct4x8BitPackedUnsignedAccelerated = false, + .integerDotProduct4x8BitPackedSignedAccelerated = false, + .integerDotProduct4x8BitPackedMixedSignednessAccelerated = false, + .storageTexelBufferOffsetAlignmentBytes = KK_MIN_TEXEL_BUFFER_ALIGNMENT, + .storageTexelBufferOffsetSingleTexelAlignment = false, + .uniformTexelBufferOffsetAlignmentBytes = KK_MIN_TEXEL_BUFFER_ALIGNMENT, + .uniformTexelBufferOffsetSingleTexelAlignment = false, + .maxBufferSize = KK_MAX_BUFFER_SIZE, + + /* VK_KHR_push_descriptor */ + .maxPushDescriptors = KK_MAX_PUSH_DESCRIPTORS, + + /* VK_EXT_custom_border_color */ + .maxCustomBorderColorSamplers = 4000, + + /* VK_EXT_extended_dynamic_state3 */ + .dynamicPrimitiveTopologyUnrestricted = false, + + /* VK_EXT_graphics_pipeline_library */ + .graphicsPipelineLibraryFastLinking = true, + .graphicsPipelineLibraryIndependentInterpolationDecoration = true, + + /* VK_KHR_line_rasterization */ + .lineSubPixelPrecisionBits = 8, + + /* VK_KHR_maintenance5 */ + .earlyFragmentMultisampleCoverageAfterSampleCounting = false, + .earlyFragmentSampleMaskTestBeforeSampleCounting = true, + .depthStencilSwizzleOneSupport = false, + .polygonModePointSize = false, + .nonStrictSinglePixelWideLinesUseParallelogram = false, + .nonStrictWideLinesUseParallelogram = false, + + /* VK_KHR_maintenance6 */ + .blockTexelViewCompatibleMultipleLayers = false, + .maxCombinedImageSamplerDescriptorCount = 3, + .fragmentShadingRateClampCombinerInputs = false, /* TODO */ + + /* VK_KHR_maintenance7 */ + .robustFragmentShadingRateAttachmentAccess = false, + .separateDepthStencilAttachmentAccess = false, + .maxDescriptorSetTotalUniformBuffersDynamic = KK_MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetTotalStorageBuffersDynamic = KK_MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetTotalBuffersDynamic = KK_MAX_DYNAMIC_BUFFERS, + .maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = + KK_MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = + KK_MAX_DYNAMIC_BUFFERS / 2, + .maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = + KK_MAX_DYNAMIC_BUFFERS, + + /* VK_EXT_legacy_vertex_attributes */ + .nativeUnalignedPerformance = true, + + /* VK_EXT_map_memory_placed */ + .minPlacedMemoryMapAlignment = os_page_size, + + /* VK_EXT_multi_draw */ + .maxMultiDrawCount = UINT32_MAX, + + /* VK_EXT_nested_command_buffer */ + .maxCommandBufferNestingLevel = UINT32_MAX, + + /* VK_EXT_pipeline_robustness */ + .defaultRobustnessStorageBuffers = + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT, + .defaultRobustnessUniformBuffers = + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT, + .defaultRobustnessVertexInputs = + VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT, + .defaultRobustnessImages = + VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT, + + /* VK_EXT_physical_device_drm gets populated later */ + + /* VK_EXT_provoking_vertex */ + .provokingVertexModePerPipeline = true, + .transformFeedbackPreservesTriangleFanProvokingVertex = true, + + /* VK_EXT_robustness2 */ + .robustStorageBufferAccessSizeAlignment = KK_SSBO_BOUNDS_CHECK_ALIGNMENT, + .robustUniformBufferAccessSizeAlignment = KK_MIN_UBO_ALIGNMENT, + + /* VK_EXT_sample_locations */ + .sampleLocationSampleCounts = sample_counts, + .maxSampleLocationGridSize = (VkExtent2D){1, 1}, + .sampleLocationCoordinateRange[0] = 0.0f, + .sampleLocationCoordinateRange[1] = 0.9375f, + .sampleLocationSubPixelBits = 4, + .variableSampleLocations = false, + + /* VK_EXT_shader_object */ + .shaderBinaryVersion = 0, + + /* VK_EXT_transform_feedback */ + .maxTransformFeedbackStreams = 4, + .maxTransformFeedbackBuffers = 4, + .maxTransformFeedbackBufferSize = UINT32_MAX, + .maxTransformFeedbackStreamDataSize = 2048, + .maxTransformFeedbackBufferDataSize = 512, + .maxTransformFeedbackBufferDataStride = 2048, + .transformFeedbackQueries = true, + .transformFeedbackStreamsLinesTriangles = false, + .transformFeedbackRasterizationStreamSelect = true, + .transformFeedbackDraw = true, + + /* VK_KHR_vertex_attribute_divisor */ + .maxVertexAttribDivisor = UINT32_MAX, + .supportsNonZeroFirstInstance = true, + + /* VK_KHR_fragment_shader_barycentric */ + .triStripVertexOrderIndependentOfProvokingVertex = false, + }; + + char gpu_name[256u]; + mtl_device_get_name(pdev->mtl_dev_handle, gpu_name); + snprintf(properties->deviceName, sizeof(properties->deviceName), "%s", + gpu_name); + + /* Not sure if there are layout specific things, so for now just reporting + * all layouts from extensions. + */ + static const VkImageLayout supported_layouts[] = { + VK_IMAGE_LAYOUT_GENERAL, /* this one is required by spec */ + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_PREINITIALIZED, + VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL, + VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL, + VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT, + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT, + }; + + properties->pCopySrcLayouts = (VkImageLayout *)supported_layouts; + properties->copySrcLayoutCount = ARRAY_SIZE(supported_layouts); + properties->pCopyDstLayouts = (VkImageLayout *)supported_layouts; + properties->copyDstLayoutCount = ARRAY_SIZE(supported_layouts); + + STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE); + memcpy(properties->optimalTilingLayoutUUID, instance->driver_build_sha, + VK_UUID_SIZE); + + properties->identicalMemoryTypeRequirements = false; + + /* VK_EXT_shader_module_identifier */ + STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == + sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); + memcpy(properties->shaderModuleIdentifierAlgorithmUUID, + vk_shaderModuleIdentifierAlgorithmUUID, + sizeof(properties->shaderModuleIdentifierAlgorithmUUID)); + + const struct { + uint64_t registry_id; + uint64_t pad; + } dev_uuid = { + .registry_id = mtl_device_get_registry_id(pdev->mtl_dev_handle), + }; + STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE); + memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE); + STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE); + memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE); + + snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "KosmicKrisp"); + snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE, + "Mesa " PACKAGE_VERSION MESA_GIT_SHA1); +} + +static void +kk_physical_device_init_pipeline_cache(struct kk_physical_device *pdev) +{ + struct kk_instance *instance = kk_physical_device_instance(pdev); + + struct mesa_sha1 sha_ctx; + _mesa_sha1_init(&sha_ctx); + + _mesa_sha1_update(&sha_ctx, instance->driver_build_sha, + sizeof(instance->driver_build_sha)); + + unsigned char sha[SHA1_DIGEST_LENGTH]; + _mesa_sha1_final(&sha_ctx, sha); + + STATIC_ASSERT(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE); + memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE); + memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE); +} + +static void +kk_physical_device_free_disk_cache(struct kk_physical_device *pdev) +{ +#ifdef ENABLE_SHADER_CACHE + if (pdev->vk.disk_cache) { + disk_cache_destroy(pdev->vk.disk_cache); + pdev->vk.disk_cache = NULL; + } +#else + assert(pdev->vk.disk_cache == NULL); +#endif +} + +static uint64_t +kk_get_sysmem_heap_size(void) +{ + uint64_t sysmem_size_B = 0; + if (!os_get_total_physical_memory(&sysmem_size_B)) + return 0; + + /* Use 3/4 of total size to avoid swapping */ + return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20); +} + +static uint64_t +kk_get_sysmem_heap_available(struct kk_physical_device *pdev) +{ + uint64_t sysmem_size_B = 0; + if (!os_get_available_system_memory(&sysmem_size_B)) { + vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory"); + return 0; + } + + /* Use 3/4 of available to avoid swapping */ + return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20); +} + +static void +get_metal_limits(struct kk_physical_device *pdev) +{ + struct mtl_size workgroup_size = + mtl_device_max_threads_per_threadgroup(pdev->mtl_dev_handle); + pdev->info.max_workgroup_count[0] = workgroup_size.x; + pdev->info.max_workgroup_count[1] = workgroup_size.y; + pdev->info.max_workgroup_count[2] = workgroup_size.z; + pdev->info.max_workgroup_invocations = + MAX3(workgroup_size.x, workgroup_size.y, workgroup_size.z); +} + +VkResult +kk_enumerate_physical_devices(struct vk_instance *_instance) +{ + struct kk_instance *instance = (struct kk_instance *)_instance; + VkResult result; + + struct kk_physical_device *pdev = + vk_zalloc(&instance->vk.alloc, sizeof(*pdev), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + + if (pdev == NULL) { + return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + pdev->mtl_dev_handle = mtl_device_create(); + if (!pdev->mtl_dev_handle) { + result = VK_SUCCESS; + goto fail_alloc; + } + get_metal_limits(pdev); + + struct vk_physical_device_dispatch_table dispatch_table; + vk_physical_device_dispatch_table_from_entrypoints( + &dispatch_table, &kk_physical_device_entrypoints, true); + vk_physical_device_dispatch_table_from_entrypoints( + &dispatch_table, &wsi_physical_device_entrypoints, false); + + struct vk_device_extension_table supported_extensions; + kk_get_device_extensions(instance, &supported_extensions); + + struct vk_features supported_features; + kk_get_device_features(&supported_extensions, &supported_features); + + struct vk_properties properties; + kk_get_device_properties(pdev, instance, &properties); + + properties.drmHasRender = false; + + result = vk_physical_device_init(&pdev->vk, &instance->vk, + &supported_extensions, &supported_features, + &properties, &dispatch_table); + if (result != VK_SUCCESS) + goto fail_mtl_dev; + + uint64_t sysmem_size_B = kk_get_sysmem_heap_size(); + if (sysmem_size_B == 0) { + result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, + "Failed to query total system memory"); + goto fail_disk_cache; + } + + uint32_t sysmem_heap_idx = pdev->mem_heap_count++; + pdev->mem_heaps[sysmem_heap_idx] = (struct kk_memory_heap){ + .size = sysmem_size_B, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + .available = kk_get_sysmem_heap_available, + }; + + pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType){ + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT | + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .heapIndex = sysmem_heap_idx, + }; + + assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps)); + assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types)); + + pdev->queue_families[pdev->queue_family_count++] = (struct kk_queue_family){ + .queue_flags = + VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, + .queue_count = 1, + }; + assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families)); + + pdev->sync_binary_type = vk_sync_binary_get_type(&kk_sync_type); + unsigned st_idx = 0; + pdev->sync_types[st_idx++] = &kk_sync_type; + pdev->sync_types[st_idx++] = &pdev->sync_binary_type.sync; + pdev->sync_types[st_idx++] = NULL; + assert(st_idx <= ARRAY_SIZE(pdev->sync_types)); + pdev->vk.supported_sync_types = pdev->sync_types; + + result = kk_init_wsi(pdev); + if (result != VK_SUCCESS) + goto fail_disk_cache; + + list_add(&pdev->vk.link, &instance->vk.physical_devices.list); + + return VK_SUCCESS; + +fail_disk_cache: + vk_physical_device_finish(&pdev->vk); +fail_mtl_dev: + mtl_release(pdev->mtl_dev_handle); +fail_alloc: + vk_free(&instance->vk.alloc, pdev); + return result; +} + +void +kk_physical_device_destroy(struct vk_physical_device *vk_pdev) +{ + struct kk_physical_device *pdev = + container_of(vk_pdev, struct kk_physical_device, vk); + + kk_finish_wsi(pdev); + kk_physical_device_free_disk_cache(pdev); + vk_physical_device_finish(&pdev->vk); + mtl_release(pdev->mtl_dev_handle); + vk_free(&pdev->vk.instance->alloc, pdev); +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetPhysicalDeviceMemoryProperties2( + VkPhysicalDevice physicalDevice, + VkPhysicalDeviceMemoryProperties2 *pMemoryProperties) +{ + VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice); + + pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count; + for (int i = 0; i < pdev->mem_heap_count; i++) { + pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap){ + .size = pdev->mem_heaps[i].size, + .flags = pdev->mem_heaps[i].flags, + }; + } + + pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count; + for (int i = 0; i < pdev->mem_type_count; i++) { + pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i]; + } + + vk_foreach_struct(ext, pMemoryProperties->pNext) { + switch (ext->sType) { + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: { + VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext; + + for (unsigned i = 0; i < pdev->mem_heap_count; i++) { + const struct kk_memory_heap *heap = &pdev->mem_heaps[i]; + uint64_t used = p_atomic_read(&heap->used); + + /* From the Vulkan 1.3.278 spec: + * + * "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize + * values in which memory usages are returned, with one element + * for each memory heap. A heap’s usage is an estimate of how + * much memory the process is currently using in that heap." + * + * TODO: Include internal allocations? + */ + p->heapUsage[i] = used; + + uint64_t available = heap->size; + if (heap->available) + available = heap->available(pdev); + + /* From the Vulkan 1.3.278 spec: + * + * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize + * values in which memory budgets are returned, with one + * element for each memory heap. A heap’s budget is a rough + * estimate of how much memory the process can allocate from + * that heap before allocations may fail or cause performance + * degradation. The budget includes any currently allocated + * device memory." + * + * and + * + * "The heapBudget value must be less than or equal to + * VkMemoryHeap::size for each heap." + * + * available (queried above) is the total amount free memory + * system-wide and does not include our allocations so we need + * to add that in. + */ + uint64_t budget = MIN2(available + used, heap->size); + + /* Set the budget at 90% of available to avoid thrashing */ + p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20); + } + + /* From the Vulkan 1.3.278 spec: + * + * "The heapBudget and heapUsage values must be zero for array + * elements greater than or equal to + * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The + * heapBudget value must be non-zero for array elements less than + * VkPhysicalDeviceMemoryProperties::memoryHeapCount." + */ + for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) { + p->heapBudget[i] = 0u; + p->heapUsage[i] = 0u; + } + break; + } + default: + vk_debug_ignored_stype(ext->sType); + break; + } + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetPhysicalDeviceQueueFamilyProperties2( + VkPhysicalDevice physicalDevice, uint32_t *pQueueFamilyPropertyCount, + VkQueueFamilyProperties2 *pQueueFamilyProperties) +{ + VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice); + VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties, + pQueueFamilyPropertyCount); + + for (uint8_t i = 0; i < pdev->queue_family_count; i++) { + const struct kk_queue_family *queue_family = &pdev->queue_families[i]; + + vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) + { + p->queueFamilyProperties.queueFlags = queue_family->queue_flags; + p->queueFamilyProperties.queueCount = queue_family->queue_count; + p->queueFamilyProperties.timestampValidBits = + 0; /* TODO_KOSMICKRISP Timestamp queries */ + p->queueFamilyProperties.minImageTransferGranularity = + (VkExtent3D){1, 1, 1}; + } + } +} + +static const VkTimeDomainKHR kk_time_domains[] = { + VK_TIME_DOMAIN_DEVICE_KHR, + VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR, +#ifdef CLOCK_MONOTONIC_RAW + VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR, +#endif +}; + +VKAPI_ATTR VkResult VKAPI_CALL +kk_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice, + uint32_t *pTimeDomainCount, + VkTimeDomainKHR *pTimeDomains) +{ + VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount); + + for (int d = 0; d < ARRAY_SIZE(kk_time_domains); d++) { + vk_outarray_append_typed(VkTimeDomainKHR, &out, i) + { + *i = kk_time_domains[d]; + } + } + + return vk_outarray_status(&out); +} + +VKAPI_ATTR void VKAPI_CALL +kk_GetPhysicalDeviceMultisamplePropertiesEXT( + VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples, + VkMultisamplePropertiesEXT *pMultisampleProperties) +{ + VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice); + + if (samples & pdev->vk.properties.sampleLocationSampleCounts) { + pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1}; + } else { + pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0}; + } +} diff --git a/src/kosmickrisp/vulkan/kk_physical_device.h b/src/kosmickrisp/vulkan/kk_physical_device.h new file mode 100644 index 00000000000..9daf6dfbdcc --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_physical_device.h @@ -0,0 +1,91 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_PHYSICAL_DEVICE_H +#define KK_PHYSICAL_DEVICE_H 1 + +#include "kk_private.h" + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "vk_physical_device.h" +#include "vk_sync.h" +#include "vk_sync_binary.h" + +#include "wsi_common.h" + +#include <sys/types.h> + +struct kk_instance; +struct kk_physical_device; + +struct kk_queue_family { + VkQueueFlags queue_flags; + uint32_t queue_count; +}; + +struct kk_memory_heap { + uint64_t size; + uint64_t used; + VkMemoryHeapFlags flags; + uint64_t (*available)(struct kk_physical_device *pdev); +}; + +struct kk_device_info { + uint32_t max_workgroup_count[3]; + uint32_t max_workgroup_invocations; +}; + +struct kk_physical_device { + struct vk_physical_device vk; + mtl_device *mtl_dev_handle; + struct kk_device_info info; + + struct wsi_device wsi_device; + + uint8_t device_uuid[VK_UUID_SIZE]; + + // TODO: add mapable VRAM heap if possible + struct kk_memory_heap mem_heaps[3]; + VkMemoryType mem_types[3]; + uint8_t mem_heap_count; + uint8_t mem_type_count; + + // Emulated binary sync type + struct vk_sync_binary_type sync_binary_type; + const struct vk_sync_type *sync_types[3]; + + struct kk_queue_family queue_families[3]; + uint8_t queue_family_count; +}; + +static inline uint32_t +kk_min_cbuf_alignment() +{ + /* Size of vec4 */ + return 16; +} + +VK_DEFINE_HANDLE_CASTS(kk_physical_device, vk.base, VkPhysicalDevice, + VK_OBJECT_TYPE_PHYSICAL_DEVICE) + +static inline struct kk_instance * +kk_physical_device_instance(struct kk_physical_device *pdev) +{ + return (struct kk_instance *)pdev->vk.instance; +} + +VkResult kk_enumerate_physical_devices(struct vk_instance *_instance); +void kk_physical_device_destroy(struct vk_physical_device *vk_device); + +#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \ + defined(VK_USE_PLATFORM_XCB_KHR) || defined(VK_USE_PLATFORM_XLIB_KHR) || \ + defined(VK_USE_PLATFORM_DISPLAY_KHR) || defined(VK_USE_PLATFORM_METAL_EXT) +#define KK_USE_WSI_PLATFORM +#endif + +#endif // KK_PHYSICAL_DEVICE_H diff --git a/src/kosmickrisp/vulkan/kk_private.h b/src/kosmickrisp/vulkan/kk_private.h new file mode 100644 index 00000000000..d50481f6f8a --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_private.h @@ -0,0 +1,95 @@ +/* + * Copyright © 2024 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_PRIVATE_H +#define KK_PRIVATE_H 1 + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "vk_log.h" +#include "vk_util.h" + +#include <assert.h> + +#define KK_MAX_SETS 32 +#define KK_MAX_PUSH_SIZE 128 +#define KK_MAX_DYNAMIC_BUFFERS 64 +#define KK_MAX_RTS 8 +#define KK_MAX_SAMPLES 8 +#define KK_MIN_SSBO_ALIGNMENT 16 +#define KK_MIN_TEXEL_BUFFER_ALIGNMENT 16 +#define KK_MIN_UBO_ALIGNMENT 64 +#define KK_MAX_VIEWPORTS 16 +#define KK_MAX_DESCRIPTOR_SIZE 64 +#define KK_MAX_PUSH_DESCRIPTORS 32 +#define KK_MAX_DESCRIPTOR_SET_SIZE (1u << 30) +#define KK_MAX_DESCRIPTORS (1 << 20) +#define KK_PUSH_DESCRIPTOR_SET_SIZE \ + (KK_MAX_PUSH_DESCRIPTORS * KK_MAX_DESCRIPTOR_SIZE) +#define KK_SSBO_BOUNDS_CHECK_ALIGNMENT 4 +#define KK_MAX_MULTIVIEW_VIEW_COUNT 32 +#define KK_TEXTURE_BUFFER_WIDTH (1u << 14) +#define KK_MAX_OCCLUSION_QUERIES (32768) + +#define KK_SPARSE_ADDR_SPACE_SIZE (1ull << 39) +#define KK_MAX_BUFFER_SIZE (1ull << 31) +#define KK_MAX_SHARED_SIZE (32 * 1024) + +/* Max size of a bound cbuf */ +#define KK_MAX_CBUF_SIZE (1u << 16) + +/* Metal related macros */ +#define KK_MTL_RESOURCE_OPTIONS \ + MTL_RESOURCE_STORAGE_MODE_SHARED | \ + MTL_RESOURCE_CPU_CACHE_MODE_DEFAULT_CACHE | \ + MTL_RESOURCE_TRACKING_MODE_UNTRACKED + +#define KK_MAX_CMD_BUFFERS 256 + +struct kk_addr_range { + uint64_t addr; + uint64_t range; +}; + +typedef enum kk_env_option_t { + KK_ENABLE_GPU_CAPTURE = 0, + KK_MAX_ENV_OPTIONS, +} kk_env_option_t; + +struct kk_env_option { + const char *name; + bool value; +}; + +static struct kk_env_option KK_ENV_OPTIONS[KK_MAX_ENV_OPTIONS] = { + [KK_ENABLE_GPU_CAPTURE] = + { + .name = "MESA_KOSMICKRISP_ENABLE_GPU_CAPTURE", + .value = false, + }, +}; + +static inline bool +kk_get_environment_boolean(kk_env_option_t option) +{ + assert(option >= 0 && option < KK_MAX_ENV_OPTIONS); + struct kk_env_option *opt = &KK_ENV_OPTIONS[option]; + const char *env_str = getenv(opt->name); + if (env_str) { + if (strncmp(env_str, "0", 1) != 0) { + opt->value = true; + } else { + opt->value = false; + } + } + return opt->value; +} + +#define kk_debug_ignored_stype(sType) \ + mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType)) + +#endif diff --git a/src/kosmickrisp/vulkan/kk_query_pool.c b/src/kosmickrisp/vulkan/kk_query_pool.c new file mode 100644 index 00000000000..c6653ba9d13 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_query_pool.c @@ -0,0 +1,431 @@ +/* + * Copyright 2024 Valve Corporation + * Copyright 2024 Alyssa Rosenzweig + * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_query_pool.h" + +#include "kk_bo.h" +#include "kk_buffer.h" +#include "kk_cmd_buffer.h" +#include "kk_device.h" +#include "kk_encoder.h" +#include "kk_entrypoints.h" +#include "kk_physical_device.h" +#include "kk_query_table.h" +#include "kkcl.h" + +struct kk_query_report { + uint64_t value; +}; + +static inline bool +kk_has_available(const struct kk_query_pool *pool) +{ + return pool->vk.query_type != VK_QUERY_TYPE_TIMESTAMP; +} + +uint16_t * +kk_pool_oq_index_ptr(const struct kk_query_pool *pool) +{ + return (uint16_t *)((uint8_t *)pool->bo->cpu + pool->query_start); +} + +static uint32_t +kk_reports_per_query(struct kk_query_pool *pool) +{ + switch (pool->vk.query_type) { + case VK_QUERY_TYPE_OCCLUSION: + case VK_QUERY_TYPE_TIMESTAMP: + case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: + return 1; + case VK_QUERY_TYPE_PIPELINE_STATISTICS: + return util_bitcount(pool->vk.pipeline_statistics); + case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: + // Primitives succeeded and primitives needed + return 2; + default: + UNREACHABLE("Unsupported query type"); + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkQueryPool *pQueryPool) +{ + VK_FROM_HANDLE(kk_device, dev, device); + struct kk_query_pool *pool; + VkResult result = VK_SUCCESS; + + pool = + vk_query_pool_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*pool)); + if (!pool) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + bool occlusion = pCreateInfo->queryType == VK_QUERY_TYPE_OCCLUSION; + unsigned occlusion_queries = occlusion ? pCreateInfo->queryCount : 0; + + /* We place the availability first and then data */ + pool->query_start = 0; + if (kk_has_available(pool)) { + pool->query_start = align(pool->vk.query_count * sizeof(uint64_t), + sizeof(struct kk_query_report)); + } + + uint32_t reports_per_query = kk_reports_per_query(pool); + pool->query_stride = reports_per_query * sizeof(struct kk_query_report); + + if (pool->vk.query_count > 0) { + uint32_t bo_size = pool->query_start; + + /* For occlusion queries, we stick the query index remapping here */ + if (occlusion_queries) + bo_size += sizeof(uint16_t) * pool->vk.query_count; + else + bo_size += pool->query_stride * pool->vk.query_count; + + result = kk_alloc_bo(dev, &dev->vk.base, bo_size, 8, &pool->bo); + if (result != VK_SUCCESS) { + kk_DestroyQueryPool(device, kk_query_pool_to_handle(pool), pAllocator); + return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + /* TODO_KOSMICKRISP Timestamps */ + } + + uint16_t *oq_index = kk_pool_oq_index_ptr(pool); + + for (unsigned i = 0; i < occlusion_queries; ++i) { + uint64_t zero = 0; + unsigned index; + + VkResult result = + kk_query_table_add(dev, &dev->occlusion_queries, zero, &index); + + if (result != VK_SUCCESS) { + kk_DestroyQueryPool(device, kk_query_pool_to_handle(pool), pAllocator); + return vk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + + /* We increment as we go so we can clean up properly if we run out */ + assert(pool->oq_queries < occlusion_queries); + oq_index[pool->oq_queries++] = index; + } + + *pQueryPool = kk_query_pool_to_handle(pool); + + return result; +} + +VKAPI_ATTR void VKAPI_CALL +kk_DestroyQueryPool(VkDevice device, VkQueryPool queryPool, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_query_pool, pool, queryPool); + + if (!pool) + return; + + uint16_t *oq_index = kk_pool_oq_index_ptr(pool); + + for (unsigned i = 0; i < pool->oq_queries; ++i) { + kk_query_table_remove(dev, &dev->occlusion_queries, oq_index[i]); + } + + kk_destroy_bo(dev, pool->bo); + + vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk); +} + +static uint64_t * +kk_query_available_map(struct kk_query_pool *pool, uint32_t query) +{ + assert(kk_has_available(pool)); + assert(query < pool->vk.query_count); + return (uint64_t *)pool->bo->cpu + query; +} + +static uint64_t +kk_query_offset(struct kk_query_pool *pool, uint32_t query) +{ + assert(query < pool->vk.query_count); + return pool->query_start + query * pool->query_stride; +} + +static uint64_t +kk_query_report_addr(struct kk_device *dev, struct kk_query_pool *pool, + uint32_t query) +{ + if (pool->oq_queries) { + uint16_t *oq_index = kk_pool_oq_index_ptr(pool); + return dev->occlusion_queries.bo->gpu + + (oq_index[query] * sizeof(uint64_t)); + } else { + return pool->bo->gpu + kk_query_offset(pool, query); + } +} + +static uint64_t +kk_query_available_addr(struct kk_query_pool *pool, uint32_t query) +{ + assert(kk_has_available(pool)); + assert(query < pool->vk.query_count); + return pool->bo->gpu + query * sizeof(uint64_t); +} + +static struct kk_query_report * +kk_query_report_map(struct kk_device *dev, struct kk_query_pool *pool, + uint32_t query) +{ + if (pool->oq_queries) { + uint64_t *queries = (uint64_t *)(dev->occlusion_queries.bo->cpu); + uint16_t *oq_index = kk_pool_oq_index_ptr(pool); + + return (struct kk_query_report *)&queries[oq_index[query]]; + } else { + return (void *)((char *)pool->bo->cpu + kk_query_offset(pool, query)); + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_ResetQueryPool(VkDevice device, VkQueryPool queryPool, uint32_t firstQuery, + uint32_t queryCount) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_query_pool, pool, queryPool); + for (uint32_t i = 0; i < queryCount; i++) { + struct kk_query_report *reports = + kk_query_report_map(dev, pool, firstQuery + i); + + uint64_t value = 0; + if (kk_has_available(pool)) { + uint64_t *available = kk_query_available_map(pool, firstQuery + i); + *available = 0u; + } else { + value = UINT64_MAX; + } + + for (unsigned j = 0; j < kk_reports_per_query(pool); ++j) { + reports[j].value = value; + } + } +} + +/** + * Goes through a series of consecutive query indices in the given pool, + * setting all element values to 0 and emitting them as available. + */ +static void +emit_zero_queries(struct kk_cmd_buffer *cmd, struct kk_query_pool *pool, + uint32_t first_index, uint32_t num_queries, + bool set_available) +{ + struct kk_device *dev = kk_cmd_buffer_device(cmd); + mtl_buffer *buffer = pool->bo->map; + + for (uint32_t i = 0; i < num_queries; i++) { + uint64_t report = kk_query_report_addr(dev, pool, first_index + i); + + uint64_t value = 0; + if (kk_has_available(pool)) { + uint64_t available = kk_query_available_addr(pool, first_index + i); + kk_cmd_write(cmd, buffer, available, set_available); + } else { + value = set_available ? 0u : UINT64_MAX; + } + + /* XXX: is this supposed to happen on the begin? */ + for (unsigned j = 0; j < kk_reports_per_query(pool); ++j) { + kk_cmd_write(cmd, buffer, + report + (j * sizeof(struct kk_query_report)), value); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, + uint32_t firstQuery, uint32_t queryCount) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_query_pool, pool, queryPool); + emit_zero_queries(cmd, pool, firstQuery, queryCount, false); + /* If we are not mid encoder, just upload the writes */ + if (cmd->encoder->main.last_used == KK_ENC_NONE) + upload_queue_writes(cmd); +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, + VkPipelineStageFlags2 stage, VkQueryPool queryPool, + uint32_t query) +{ + /* TODO_KOSMICKRISP */ +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, + uint32_t query, VkQueryControlFlags flags) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_query_pool, pool, queryPool); + cmd->state.gfx.occlusion.mode = flags & VK_QUERY_CONTROL_PRECISE_BIT + ? MTL_VISIBILITY_RESULT_MODE_COUNTING + : MTL_VISIBILITY_RESULT_MODE_BOOLEAN; + cmd->state.gfx.dirty |= KK_DIRTY_OCCLUSION; + uint16_t *oq_index = kk_pool_oq_index_ptr(pool); + cmd->state.gfx.occlusion.index = oq_index[query]; +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, + uint32_t query) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_query_pool, pool, queryPool); + cmd->state.gfx.occlusion.mode = MTL_VISIBILITY_RESULT_MODE_DISABLED; + cmd->state.gfx.dirty |= KK_DIRTY_OCCLUSION; + + /* Make the query available */ + uint64_t addr = kk_query_available_addr(pool, query); + kk_cmd_write(cmd, pool->bo->map, addr, true); +} + +static bool +kk_query_is_available(struct kk_device *dev, struct kk_query_pool *pool, + uint32_t query) +{ + if (kk_has_available(pool)) { + uint64_t *available = kk_query_available_map(pool, query); + return p_atomic_read(available) != 0; + } else { + const struct kk_query_report *report = + kk_query_report_map(dev, pool, query); + + return report->value != UINT64_MAX; + } +} + +#define KK_QUERY_TIMEOUT 2000000000ull + +static VkResult +kk_query_wait_for_available(struct kk_device *dev, struct kk_query_pool *pool, + uint32_t query) +{ + uint64_t abs_timeout_ns = os_time_get_absolute_timeout(KK_QUERY_TIMEOUT); + + while (os_time_get_nano() < abs_timeout_ns) { + if (kk_query_is_available(dev, pool, query)) + return VK_SUCCESS; + + VkResult status = vk_device_check_status(&dev->vk); + if (status != VK_SUCCESS) + return status; + } + + return vk_device_set_lost(&dev->vk, "query timeout"); +} + +static void +cpu_write_query_result(void *dst, uint32_t idx, VkQueryResultFlags flags, + uint64_t result) +{ + if (flags & VK_QUERY_RESULT_64_BIT) { + uint64_t *dst64 = dst; + dst64[idx] = result; + } else { + uint32_t *dst32 = dst; + dst32[idx] = result; + } +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_GetQueryPoolResults(VkDevice device, VkQueryPool queryPool, + uint32_t firstQuery, uint32_t queryCount, + size_t dataSize, void *pData, VkDeviceSize stride, + VkQueryResultFlags flags) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_query_pool, pool, queryPool); + + if (vk_device_is_lost(&dev->vk)) + return VK_ERROR_DEVICE_LOST; + + VkResult status = VK_SUCCESS; + for (uint32_t i = 0; i < queryCount; i++) { + const uint32_t query = firstQuery + i; + + bool available = kk_query_is_available(dev, pool, query); + + if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) { + status = kk_query_wait_for_available(dev, pool, query); + if (status != VK_SUCCESS) + return status; + + available = true; + } + + bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT); + + const struct kk_query_report *src = kk_query_report_map(dev, pool, query); + assert(i * stride < dataSize); + void *dst = (char *)pData + i * stride; + + uint32_t reports = kk_reports_per_query(pool); + if (write_results) { + for (uint32_t j = 0; j < reports; j++) { + cpu_write_query_result(dst, j, flags, src[j].value); + } + } + + if (!write_results) + status = VK_NOT_READY; + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) + cpu_write_query_result(dst, reports, flags, available); + } + + return status; +} + +VKAPI_ATTR void VKAPI_CALL +kk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool, + uint32_t firstQuery, uint32_t queryCount, + VkBuffer dstBuffer, VkDeviceSize dstOffset, + VkDeviceSize stride, VkQueryResultFlags flags) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, commandBuffer); + VK_FROM_HANDLE(kk_query_pool, pool, queryPool); + VK_FROM_HANDLE(kk_buffer, dst_buf, dstBuffer); + struct kk_device *dev = kk_cmd_buffer_device(cmd); + + struct kk_copy_query_pool_results_info info = { + .availability = kk_has_available(pool) ? pool->bo->gpu : 0, + .results = pool->oq_queries ? dev->occlusion_queries.bo->gpu + : pool->bo->gpu + pool->query_start, + .indices = pool->oq_queries ? pool->bo->gpu + pool->query_start : 0, + .dst_addr = dst_buf->vk.device_address + dstOffset, + .dst_stride = stride, + .first_query = firstQuery, + .flags = flags, + .reports_per_query = kk_reports_per_query(pool), + .query_count = queryCount, + }; + + util_dynarray_append(&cmd->encoder->copy_query_pool_result_infos, + struct kk_copy_query_pool_results_info, info); + util_dynarray_append(&cmd->encoder->resident_buffers, mtl_buffer *, + dst_buf->mtl_handle); + util_dynarray_append(&cmd->encoder->resident_buffers, mtl_buffer *, + pool->bo->map); + util_dynarray_append(&cmd->encoder->resident_buffers, mtl_buffer *, + dev->occlusion_queries.bo->map); + /* If we are not mid encoder, just upload the writes */ + if (cmd->encoder->main.last_used == KK_ENC_NONE) + upload_queue_writes(cmd); +} diff --git a/src/kosmickrisp/vulkan/kk_query_pool.h b/src/kosmickrisp/vulkan/kk_query_pool.h new file mode 100644 index 00000000000..509a564dcfa --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_query_pool.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_QUERY_POOL_H +#define KK_QUERY_POOL_H 1 + +#include "kk_private.h" + +#include "vulkan/runtime/vk_query_pool.h" + +struct kk_query_pool { + struct vk_query_pool vk; + + struct kk_bo *bo; + + uint32_t query_start; + uint32_t query_stride; + + unsigned oq_queries; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_query_pool, vk.base, VkQueryPool, + VK_OBJECT_TYPE_QUERY_POOL) + +uint16_t *kk_pool_oq_index_ptr(const struct kk_query_pool *pool); + +#endif /* KK_QUERY_POOL_H */ diff --git a/src/kosmickrisp/vulkan/kk_query_table.c b/src/kosmickrisp/vulkan/kk_query_table.c new file mode 100644 index 00000000000..1ab205de0c2 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_query_table.c @@ -0,0 +1,241 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_query_table.h" + +#include "kk_device.h" +#include "kk_physical_device.h" + +static uint32_t query_size = sizeof(uint64_t); + +static VkResult +kk_query_table_grow_locked(struct kk_device *dev, struct kk_query_table *table, + uint32_t new_alloc) +{ + struct kk_bo *bo; + BITSET_WORD *new_in_use; + uint32_t *new_free_table; + VkResult result; + + assert(new_alloc <= table->max_alloc); + + const uint32_t new_mem_size = new_alloc * query_size; + result = kk_alloc_bo(dev, &dev->vk.base, new_mem_size, 256, &bo); + if (result != VK_SUCCESS) + return result; + + /* We don't allow resize */ + assert(table->bo == NULL); + table->bo = bo; + + assert((new_alloc % BITSET_WORDBITS) == 0); + const size_t new_in_use_size = BITSET_WORDS(new_alloc) * sizeof(BITSET_WORD); + new_in_use = + vk_realloc(&dev->vk.alloc, table->in_use, new_in_use_size, + sizeof(BITSET_WORD), VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_in_use == NULL) { + return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY, + "Failed to allocate query in-use set"); + } + memset((char *)new_in_use, 0, new_in_use_size); + table->in_use = new_in_use; + + const size_t new_free_table_size = new_alloc * sizeof(uint32_t); + new_free_table = + vk_realloc(&dev->vk.alloc, table->free_table, new_free_table_size, 4, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_free_table == NULL) { + return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY, + "Failed to allocate query free table"); + } + table->free_table = new_free_table; + + return VK_SUCCESS; +} + +VkResult +kk_query_table_init(struct kk_device *dev, struct kk_query_table *table, + uint32_t query_count) +{ + memset(table, 0, sizeof(*table)); + VkResult result; + + simple_mtx_init(&table->mutex, mtx_plain); + + assert(util_is_power_of_two_nonzero(query_count)); + + table->max_alloc = query_count; + table->next_query = 0; + table->free_count = 0; + + result = kk_query_table_grow_locked(dev, table, query_count); + if (result != VK_SUCCESS) { + kk_query_table_finish(dev, table); + return result; + } + + return VK_SUCCESS; +} + +void +kk_query_table_finish(struct kk_device *dev, struct kk_query_table *table) +{ + if (table->bo != NULL) + kk_destroy_bo(dev, table->bo); + vk_free(&dev->vk.alloc, table->in_use); + vk_free(&dev->vk.alloc, table->free_table); + simple_mtx_destroy(&table->mutex); +} + +static VkResult +kk_query_table_alloc_locked(struct kk_device *dev, struct kk_query_table *table, + uint32_t *index_out) +{ + while (1) { + uint32_t index; + if (table->free_count > 0) { + index = table->free_table[--table->free_count]; + } else if (table->next_query < table->max_alloc) { + index = table->next_query++; + } else { + return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY, + "Query table not large enough"); + } + + if (!BITSET_TEST(table->in_use, index)) { + BITSET_SET(table->in_use, index); + *index_out = index; + return VK_SUCCESS; + } + } +} + +static VkResult +kk_query_table_take_locked(struct kk_device *dev, struct kk_query_table *table, + uint32_t index) +{ + if (index >= table->max_alloc) { + return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS, + "Query %u does not exist", index); + } + + if (BITSET_TEST(table->in_use, index)) { + return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS, + "Query %u is already in use", index); + } else { + BITSET_SET(table->in_use, index); + return VK_SUCCESS; + } +} + +static VkResult +kk_query_table_add_locked(struct kk_device *dev, struct kk_query_table *table, + uint64_t value, uint32_t *index_out) +{ + VkResult result = kk_query_table_alloc_locked(dev, table, index_out); + if (result != VK_SUCCESS) + return result; + + uint64_t *map = (uint64_t *)table->bo->cpu + *index_out; + *map = value; + + return VK_SUCCESS; +} + +VkResult +kk_query_table_add(struct kk_device *dev, struct kk_query_table *table, + uint64_t value, uint32_t *index_out) +{ + simple_mtx_lock(&table->mutex); + VkResult result = kk_query_table_add_locked(dev, table, value, index_out); + simple_mtx_unlock(&table->mutex); + + return result; +} + +static VkResult +kk_query_table_insert_locked(struct kk_device *dev, + struct kk_query_table *table, uint32_t index, + uint64_t value) +{ + VkResult result = kk_query_table_take_locked(dev, table, index); + if (result != VK_SUCCESS) + return result; + + uint64_t *map = (uint64_t *)table->bo->cpu + index; + *map = value; + + return result; +} + +VkResult +kk_query_table_insert(struct kk_device *dev, struct kk_query_table *table, + uint32_t index, uint64_t value) +{ + simple_mtx_lock(&table->mutex); + VkResult result = kk_query_table_insert_locked(dev, table, index, value); + simple_mtx_unlock(&table->mutex); + + return result; +} + +static int +compar_u32(const void *_a, const void *_b) +{ + const uint32_t *a = _a, *b = _b; + return *a - *b; +} + +static void +kk_query_table_compact_free_table(struct kk_query_table *table) +{ + if (table->free_count <= 1) + return; + + qsort(table->free_table, table->free_count, sizeof(*table->free_table), + compar_u32); + + uint32_t j = 1; + for (uint32_t i = 1; i < table->free_count; i++) { + if (table->free_table[i] == table->free_table[j - 1]) + continue; + + assert(table->free_table[i] > table->free_table[j - 1]); + table->free_table[j++] = table->free_table[i]; + } + + table->free_count = j; +} + +void +kk_query_table_remove(struct kk_device *dev, struct kk_query_table *table, + uint32_t index) +{ + simple_mtx_lock(&table->mutex); + + uint64_t *map = (uint64_t *)table->bo->cpu + index; + *map = 0u; + + assert(BITSET_TEST(table->in_use, index)); + + /* There may be duplicate entries in the free table. For most operations, + * this is fine as we always consult kk_query_table::in_use when + * allocating. However, it does mean that there's nothing preventing our + * free table from growing larger than the memory we allocated for it. In + * the unlikely event that we end up with more entries than we can fit in + * the allocated space, compact the table to ensure that the new entry + * we're about to add fits. + */ + if (table->free_count >= table->max_alloc) + kk_query_table_compact_free_table(table); + assert(table->free_count < table->max_alloc); + + BITSET_CLEAR(table->in_use, index); + table->free_table[table->free_count++] = index; + + simple_mtx_unlock(&table->mutex); +} diff --git a/src/kosmickrisp/vulkan/kk_query_table.h b/src/kosmickrisp/vulkan/kk_query_table.h new file mode 100644 index 00000000000..a7d0be2cb5a --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_query_table.h @@ -0,0 +1,57 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_QUERY_TABLE_H +#define KK_QUERY_TABLE_H 1 + +#include "kk_private.h" + +#include "kk_bo.h" + +#include "util/bitset.h" +#include "util/simple_mtx.h" + +struct kk_device; + +struct kk_query_table { + simple_mtx_t mutex; + + uint32_t max_alloc; /**< Maximum possible number of queries */ + uint32_t next_query; /**< Next unallocated query */ + uint32_t free_count; /**< Size of free_table */ + + struct kk_bo *bo; /**< Memoery where queries are stored */ + + /* Bitset of all queries currently in use. This is the single source + * of truth for what is and isn't free. The free_table and next_query are + * simply hints to make finding a free descrptor fast. Every free + * query will either be above next_query or in free_table but not + * everything which satisfies those two criteria is actually free. + */ + BITSET_WORD *in_use; + + /* Stack for free query elements */ + uint32_t *free_table; +}; + +VkResult kk_query_table_init(struct kk_device *dev, + struct kk_query_table *table, + uint32_t query_count); + +void kk_query_table_finish(struct kk_device *dev, struct kk_query_table *table); + +VkResult kk_query_table_add(struct kk_device *dev, struct kk_query_table *table, + uint64_t value, uint32_t *index_out); + +VkResult kk_query_table_insert(struct kk_device *dev, + struct kk_query_table *table, uint32_t index, + uint64_t value); + +void kk_query_table_remove(struct kk_device *dev, struct kk_query_table *table, + uint32_t index); + +#endif /* KK_QUERY_TABLE_H */ diff --git a/src/kosmickrisp/vulkan/kk_queue.c b/src/kosmickrisp/vulkan/kk_queue.c new file mode 100644 index 00000000000..3d112c21cae --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_queue.c @@ -0,0 +1,114 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_queue.h" +#include "kk_buffer.h" +#include "kk_cmd_buffer.h" +#include "kk_device.h" +#include "kk_encoder.h" +#include "kk_physical_device.h" +#include "kk_sync.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +#include "vk_cmd_queue.h" + +static VkResult +kk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit) +{ + struct kk_queue *queue = container_of(vk_queue, struct kk_queue, vk); + struct kk_device *dev = kk_queue_device(queue); + + if (vk_queue_is_lost(&queue->vk)) + return VK_ERROR_DEVICE_LOST; + + struct kk_encoder *encoder; + VkResult result = kk_encoder_init(dev->mtl_handle, queue, &encoder); + if (result != VK_SUCCESS) + return result; + + /* Chain with previous sumbission */ + if (queue->wait_fence) { + util_dynarray_append(&encoder->main.fences, mtl_fence *, + queue->wait_fence); + encoder->main.wait_fence = true; + } + + for (struct vk_sync_wait *wait = submit->waits, + *end = submit->waits + submit->wait_count; + wait != end; ++wait) { + struct kk_sync_timeline *sync = + container_of(wait->sync, struct kk_sync_timeline, base); + mtl_encode_wait_for_event(encoder->main.cmd_buffer, sync->mtl_handle, + wait->wait_value); + } + + for (uint32_t i = 0; i < submit->command_buffer_count; ++i) { + struct kk_cmd_buffer *cmd_buffer = + container_of(submit->command_buffers[i], struct kk_cmd_buffer, vk); + cmd_buffer->encoder = encoder; + /* TODO_KOSMICKRISP We need to release command buffer resources here for + * the following case: User records command buffers once and then reuses + * them multiple times. The resource release should be done at + * vkBeginCommandBuffer, but because we are recording all commands to + * later execute them at queue submission, the recording does not record + * the begin/end commands and jumps straight to the actual commands. */ + kk_cmd_release_resources(dev, cmd_buffer); + + vk_cmd_queue_execute(&cmd_buffer->vk.cmd_queue, + kk_cmd_buffer_to_handle(cmd_buffer), + &dev->vk.dispatch_table); + kk_encoder_end(cmd_buffer); + cmd_buffer->encoder = NULL; + } + + for (uint32_t i = 0u; i < submit->signal_count; ++i) { + struct vk_sync_signal *signal = &submit->signals[i]; + struct kk_sync_timeline *sync = + container_of(signal->sync, struct kk_sync_timeline, base); + mtl_encode_signal_event(encoder->main.cmd_buffer, sync->mtl_handle, + signal->signal_value); + } + + /* Steal the last fence to chain with the next submission */ + if (util_dynarray_num_elements(&encoder->main.fences, mtl_fence *) > 0) + queue->wait_fence = util_dynarray_pop(&encoder->main.fences, mtl_fence *); + kk_encoder_submit(encoder); + + return VK_SUCCESS; +} + +VkResult +kk_queue_init(struct kk_device *dev, struct kk_queue *queue, + const VkDeviceQueueCreateInfo *pCreateInfo, + uint32_t index_in_family) +{ + VkResult result; + + result = vk_queue_init(&queue->vk, &dev->vk, pCreateInfo, index_in_family); + if (result != VK_SUCCESS) + return result; + + queue->main.mtl_handle = + mtl_new_command_queue(dev->mtl_handle, KK_MAX_CMD_BUFFERS); + queue->pre_gfx.mtl_handle = + mtl_new_command_queue(dev->mtl_handle, KK_MAX_CMD_BUFFERS); + + queue->vk.driver_submit = kk_queue_submit; + + return VK_SUCCESS; +} + +void +kk_queue_finish(struct kk_device *dev, struct kk_queue *queue) +{ + if (queue->wait_fence) + mtl_release(queue->wait_fence); + mtl_release(queue->pre_gfx.mtl_handle); + mtl_release(queue->main.mtl_handle); + vk_queue_finish(&queue->vk); +} diff --git a/src/kosmickrisp/vulkan/kk_queue.h b/src/kosmickrisp/vulkan/kk_queue.h new file mode 100644 index 00000000000..0633559d867 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_queue.h @@ -0,0 +1,42 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_QUEUE_H +#define KK_QUEUE_H 1 + +#include "kk_private.h" + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "vk_queue.h" + +struct kk_queue { + struct vk_queue vk; + /* We require one queue per command buffer no to lock. Main will handle all + * work, but if we are in a render pass and we require to massage inputs, + * then pre_gfx will be used to submit compute work that handles that so we + * don't have to break the render encoder. */ + struct { + struct mtl_command_queue *mtl_handle; + } main, pre_gfx; + + mtl_fence *wait_fence; +}; + +static inline struct kk_device * +kk_queue_device(struct kk_queue *queue) +{ + return (struct kk_device *)queue->vk.base.device; +} + +VkResult kk_queue_init(struct kk_device *dev, struct kk_queue *queue, + const VkDeviceQueueCreateInfo *pCreateInfo, + uint32_t index_in_family); + +void kk_queue_finish(struct kk_device *dev, struct kk_queue *queue); + +#endif diff --git a/src/kosmickrisp/vulkan/kk_sampler.c b/src/kosmickrisp/vulkan/kk_sampler.c new file mode 100644 index 00000000000..a555cc185c6 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_sampler.c @@ -0,0 +1,217 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_sampler.h" + +#include "kk_entrypoints.h" +#include "kk_physical_device.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" +#include "kosmickrisp/bridge/vk_to_mtl_map.h" + +#include "vk_format.h" +#include "vk_sampler.h" + +#include "util/bitpack_helpers.h" +#include "util/format/format_utils.h" +#include "util/format_srgb.h" + +static bool +uses_border(const VkSamplerCreateInfo *info) +{ + return info->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || + info->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || + info->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; +} + +static bool +is_border_color_custom(VkBorderColor color, bool workaround_rgba4) +{ + switch (color) { + case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: + /* We may need to workaround RGBA4 UNORM issues with opaque black. This + * only affects float opaque black, there are no pure integer RGBA4 + * formats to worry about. + */ + return workaround_rgba4; + + case VK_BORDER_COLOR_INT_CUSTOM_EXT: + case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT: + return true; + + default: + return false; + } +} + +static struct mtl_sampler_packed +pack_sampler_info(const struct VkSamplerCreateInfo *sampler_info) +{ + enum mtl_compare_function compare = + sampler_info->compareEnable + ? vk_compare_op_to_mtl_compare_function(sampler_info->compareOp) + : MTL_COMPARE_FUNCTION_ALWAYS; + enum mtl_sampler_mip_filter mip_filter = + sampler_info->unnormalizedCoordinates + ? MTL_SAMPLER_MIP_FILTER_NOT_MIP_MAPPED + : vk_sampler_mipmap_mode_to_mtl_sampler_mip_filter( + sampler_info->mipmapMode); + enum mtl_sampler_border_color border_color = + uses_border(sampler_info) ? vk_border_color_to_mtl_sampler_border_color( + sampler_info->borderColor) + : MTL_SAMPLER_BORDER_COLOR_OPAQUE_WHITE; + uint32_t max_anisotropy = + sampler_info->anisotropyEnable + ? util_next_power_of_two(MAX2(sampler_info->maxAnisotropy, 1)) + : 1u; + return (struct mtl_sampler_packed){ + .mode_u = vk_sampler_address_mode_to_mtl_sampler_address_mode( + sampler_info->addressModeU), + .mode_v = vk_sampler_address_mode_to_mtl_sampler_address_mode( + sampler_info->addressModeV), + .mode_w = vk_sampler_address_mode_to_mtl_sampler_address_mode( + sampler_info->addressModeW), + .border_color = border_color, + .min_filter = + vk_filter_to_mtl_sampler_min_mag_filter(sampler_info->minFilter), + .mag_filter = + vk_filter_to_mtl_sampler_min_mag_filter(sampler_info->magFilter), + .mip_filter = mip_filter, + .compare_func = compare, + .min_lod = sampler_info->minLod, + .max_lod = sampler_info->maxLod, + .max_anisotropy = max_anisotropy, + .normalized_coordinates = !sampler_info->unnormalizedCoordinates, + }; +} + +static mtl_sampler_descriptor * +create_sampler_descriptor(const struct mtl_sampler_packed *packed) +{ + mtl_sampler_descriptor *descriptor = mtl_new_sampler_descriptor(); + mtl_sampler_descriptor_set_normalized_coordinates( + descriptor, packed->normalized_coordinates); + mtl_sampler_descriptor_set_address_mode(descriptor, packed->mode_u, + packed->mode_v, packed->mode_w); + mtl_sampler_descriptor_set_border_color(descriptor, packed->border_color); + mtl_sampler_descriptor_set_filters(descriptor, packed->min_filter, + packed->mag_filter, packed->mip_filter); + mtl_sampler_descriptor_set_lod_clamp(descriptor, packed->min_lod, + packed->max_lod); + mtl_sampler_descriptor_set_max_anisotropy(descriptor, + packed->max_anisotropy); + mtl_sampler_descriptor_set_compare_function(descriptor, + packed->compare_func); + return descriptor; +} + +mtl_sampler * +kk_sampler_create(struct kk_device *dev, + const struct mtl_sampler_packed *packed) +{ + mtl_sampler_descriptor *desc = create_sampler_descriptor(packed); + return mtl_new_sampler(dev->mtl_handle, desc); +} + +VKAPI_ATTR VkResult VKAPI_CALL +kk_CreateSampler(VkDevice device, const VkSamplerCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, VkSampler *pSampler) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VkResult result = VK_SUCCESS; + struct kk_sampler *sampler; + + sampler = + vk_sampler_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*sampler)); + if (!sampler) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct mtl_sampler_packed packed = pack_sampler_info(pCreateInfo); + result = kk_sampler_heap_add(dev, packed, &sampler->planes[0].hw); + if (result != VK_SUCCESS) { + kk_DestroySampler(device, kk_sampler_to_handle(sampler), pAllocator); + return result; + } + sampler->plane_count = 1; + + /* In order to support CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT, we + * need multiple sampler planes: at minimum we will need one for luminance + * (the default), and one for chroma. Each sampler plane needs its own + * sampler table entry. However, sampler table entries are very rare on + * NVIDIA; we only have 4096 entries for the whole VkDevice, and each plane + * would burn one of those. So we make sure to allocate only the minimum + * amount that we actually need (i.e., either 1 or 2), and then just copy + * the last sampler plane out as far as we need to fill the number of image + * planes. + */ + + if (sampler->vk.ycbcr_conversion) { + const VkFilter chroma_filter = + sampler->vk.ycbcr_conversion->state.chroma_filter; + if (pCreateInfo->magFilter != chroma_filter || + pCreateInfo->minFilter != chroma_filter) { + packed.min_filter = packed.mag_filter = + vk_filter_to_mtl_sampler_min_mag_filter(chroma_filter); + result = kk_sampler_heap_add(dev, packed, &sampler->planes[1].hw); + if (result != VK_SUCCESS) { + kk_DestroySampler(device, kk_sampler_to_handle(sampler), + pAllocator); + return result; + } + sampler->plane_count = 2; + } + } + + /* LOD data passed in the descriptor set */ + sampler->lod_bias_fp16 = _mesa_float_to_half(pCreateInfo->mipLodBias); + sampler->lod_min_fp16 = _mesa_float_to_half(pCreateInfo->minLod); + sampler->lod_max_fp16 = _mesa_float_to_half(pCreateInfo->maxLod); + + /* Border color passed in the descriptor */ + sampler->has_border = uses_border(pCreateInfo) && + is_border_color_custom(pCreateInfo->borderColor, true); + if (sampler->has_border) { + /* We also need to record the border. + * + * If there is a border colour component mapping, we need to swizzle with + * it. Otherwise, we can assume there's nothing to do. + */ + VkClearColorValue bc = sampler->vk.border_color_value; + + const VkSamplerBorderColorComponentMappingCreateInfoEXT *swiz_info = + vk_find_struct_const( + pCreateInfo->pNext, + SAMPLER_BORDER_COLOR_COMPONENT_MAPPING_CREATE_INFO_EXT); + + if (swiz_info) { + const bool is_int = vk_border_color_is_int(pCreateInfo->borderColor); + bc = vk_swizzle_color_value(bc, swiz_info->components, is_int); + } + + sampler->custom_border = bc; + } + + *pSampler = kk_sampler_to_handle(sampler); + + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +kk_DestroySampler(VkDevice device, VkSampler _sampler, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(kk_device, dev, device); + VK_FROM_HANDLE(kk_sampler, sampler, _sampler); + + if (!sampler) + return; + + for (uint8_t plane = 0; plane < sampler->plane_count; plane++) + kk_sampler_heap_remove(dev, sampler->planes[plane].hw); + + vk_sampler_destroy(&dev->vk, pAllocator, &sampler->vk); +} diff --git a/src/kosmickrisp/vulkan/kk_sampler.h b/src/kosmickrisp/vulkan/kk_sampler.h new file mode 100644 index 00000000000..15f3de9e582 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_sampler.h @@ -0,0 +1,40 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_SAMPLER_H +#define KK_SAMPLER_H 1 + +#include "kk_device.h" +#include "kk_physical_device.h" +#include "kk_private.h" + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "vk_sampler.h" +#include "vk_ycbcr_conversion.h" + +#include "vk_format.h" + +struct kk_sampler { + struct vk_sampler vk; + VkClearColorValue custom_border; + bool has_border; + + uint8_t plane_count; + uint16_t lod_bias_fp16; + uint16_t lod_min_fp16; + uint16_t lod_max_fp16; + + struct { + struct kk_rc_sampler *hw; + } planes[2]; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_sampler, vk.base, VkSampler, + VK_OBJECT_TYPE_SAMPLER) + +#endif /* KK_SAMPLER_H */ diff --git a/src/kosmickrisp/vulkan/kk_shader.c b/src/kosmickrisp/vulkan/kk_shader.c new file mode 100644 index 00000000000..2f723599655 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_shader.c @@ -0,0 +1,1278 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_shader.h" + +#include "kk_cmd_buffer.h" +#include "kk_descriptor_set_layout.h" +#include "kk_debug.h" +#include "kk_device.h" +#include "kk_format.h" +#include "kk_nir_lower_vbo.h" +#include "kk_physical_device.h" +#include "kk_sampler.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" +#include "kosmickrisp/bridge/vk_to_mtl_map.h" +#include "kosmickrisp/compiler/nir_to_msl.h" + +#include "nir_builder.h" +#include "nir_lower_blend.h" + +#include "vk_blend.h" +#include "vk_format.h" +#include "vk_graphics_state.h" +#include "vk_nir_convert_ycbcr.h" +#include "vk_pipeline.h" + +static const nir_shader_compiler_options * +kk_get_nir_options(struct vk_physical_device *vk_pdev, mesa_shader_stage stage, + UNUSED const struct vk_pipeline_robustness_state *rs) +{ + static nir_shader_compiler_options options = { + .lower_fdph = true, + .has_fsub = true, + .has_isub = true, + .lower_extract_word = true, + .lower_extract_byte = true, + .lower_insert_word = true, + .lower_insert_byte = true, + .lower_fmod = true, + .discard_is_demote = true, + .instance_id_includes_base_index = true, + .lower_device_index_to_zero = true, + .lower_pack_64_2x32_split = true, + .lower_unpack_64_2x32_split = true, + .lower_pack_64_2x32 = true, + .lower_pack_half_2x16 = true, + .lower_pack_split = true, + .lower_unpack_half_2x16 = true, + .has_cs_global_id = true, + .lower_vector_cmp = true, + .lower_fquantize2f16 = true, + .lower_scmp = true, + .lower_ifind_msb = true, + .lower_ufind_msb = true, + .lower_find_lsb = true, + .has_uclz = true, + .lower_mul_2x32_64 = true, + .lower_uadd_carry = true, + .lower_usub_borrow = true, + /* Metal does not support double. */ + .lower_doubles_options = (nir_lower_doubles_options)(~0), + .lower_int64_options = + nir_lower_ufind_msb64 | nir_lower_subgroup_shuffle64, + }; + return &options; +} + +static struct spirv_to_nir_options +kk_get_spirv_options(struct vk_physical_device *vk_pdev, + UNUSED mesa_shader_stage stage, + const struct vk_pipeline_robustness_state *rs) +{ + return (struct spirv_to_nir_options){ + .environment = NIR_SPIRV_VULKAN, + .ssbo_addr_format = nir_address_format_64bit_bounded_global, + .phys_ssbo_addr_format = nir_address_format_64bit_global, + .ubo_addr_format = nir_address_format_64bit_bounded_global, + .shared_addr_format = nir_address_format_32bit_offset, + .min_ssbo_alignment = KK_MIN_SSBO_ALIGNMENT, + .min_ubo_alignment = KK_MIN_UBO_ALIGNMENT, + }; +} + +static void +kk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev, nir_shader *nir, + UNUSED const struct vk_pipeline_robustness_state *rs) +{ + /* Gather info before preprocess_nir but after some general lowering, so + * inputs_read and system_values_read are accurately set. + */ + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + /* nir_lower_io_to_temporaries is required before nir_lower_blend since the + * blending pass sinks writes to the end of the block where we may have a + * jump, which is illegal. + */ + NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries, + nir_shader_get_entrypoint(nir), true, false); + + msl_preprocess_nir(nir); +} + +struct kk_vs_key { + bool is_points; +}; + +static void +kk_populate_vs_key(struct kk_vs_key *key, + const struct vk_graphics_pipeline_state *state) +{ + memset(key, 0, sizeof(*key)); + key->is_points = + (state->ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST); +} + +struct kk_fs_key { + VkFormat color_formats[MESA_VK_MAX_COLOR_ATTACHMENTS]; + struct vk_color_blend_state color_blend; + uint32_t rasterization_samples; + uint16_t static_sample_mask; + bool has_depth; +}; + +static void +kk_populate_fs_key(struct kk_fs_key *key, + const struct vk_graphics_pipeline_state *state) +{ + memset(key, 0, sizeof(*key)); + + /* Required since we [de]serialize blend, and render target swizzle for + * non-native formats */ + memcpy(key->color_formats, state->rp->color_attachment_formats, + sizeof(key->color_formats)); + + /* Blend state gets [de]serialized, so we need to hash it */ + if (state->cb) + key->color_blend = *(state->cb); + + if (state->ms) { + key->rasterization_samples = state->ms->rasterization_samples; + key->static_sample_mask = state->ms->sample_mask; + } + + /* Depth writes are removed unless there's an actual attachment */ + key->has_depth = state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED; +} + +static void +kk_hash_graphics_state(struct vk_physical_device *device, + const struct vk_graphics_pipeline_state *state, + const struct vk_features *enabled_features, + VkShaderStageFlags stages, blake3_hash blake3_out) +{ + struct mesa_blake3 blake3_ctx; + _mesa_blake3_init(&blake3_ctx); + + if (stages & VK_SHADER_STAGE_VERTEX_BIT) { + struct kk_vs_key key; + kk_populate_vs_key(&key, state); + _mesa_blake3_update(&blake3_ctx, &key, sizeof(key)); + } + + if (stages & VK_SHADER_STAGE_FRAGMENT_BIT) { + struct kk_fs_key key; + kk_populate_fs_key(&key, state); + _mesa_blake3_update(&blake3_ctx, &key, sizeof(key)); + + _mesa_blake3_update(&blake3_ctx, &state->rp->view_mask, + sizeof(state->rp->view_mask)); + } + + _mesa_blake3_final(&blake3_ctx, blake3_out); +} + +static void +shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align) +{ + assert(glsl_type_is_vector_or_scalar(type)); + + uint32_t comp_size = + glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; + unsigned length = glsl_get_vector_elements(type); + *size = comp_size * length, *align = comp_size; +} + +struct lower_ycbcr_state { + uint32_t set_layout_count; + struct vk_descriptor_set_layout *const *set_layouts; +}; + +static const struct vk_ycbcr_conversion_state * +lookup_ycbcr_conversion(const void *_state, uint32_t set, uint32_t binding, + uint32_t array_index) +{ + const struct lower_ycbcr_state *state = _state; + assert(set < state->set_layout_count); + assert(state->set_layouts[set] != NULL); + const struct kk_descriptor_set_layout *set_layout = + vk_to_kk_descriptor_set_layout(state->set_layouts[set]); + assert(binding < set_layout->binding_count); + + const struct kk_descriptor_set_binding_layout *bind_layout = + &set_layout->binding[binding]; + + if (bind_layout->immutable_samplers == NULL) + return NULL; + + array_index = MIN2(array_index, bind_layout->array_size - 1); + + const struct kk_sampler *sampler = + bind_layout->immutable_samplers[array_index]; + + return sampler && sampler->vk.ycbcr_conversion + ? &sampler->vk.ycbcr_conversion->state + : NULL; +} + +static int +type_size_vec4(const struct glsl_type *type, bool bindless) +{ + return glsl_count_attribute_slots(type, false); +} + +static bool +kk_nir_swizzle_fragment_output(nir_builder *b, nir_intrinsic_instr *intrin, + void *data) +{ + if (intrin->intrinsic != nir_intrinsic_store_output && + intrin->intrinsic != nir_intrinsic_load_output) + return false; + + unsigned slot = nir_intrinsic_io_semantics(intrin).location; + if (slot < FRAG_RESULT_DATA0) + return false; + + const struct vk_graphics_pipeline_state *state = + (const struct vk_graphics_pipeline_state *)data; + VkFormat vk_format = + state->rp->color_attachment_formats[slot - FRAG_RESULT_DATA0]; + if (vk_format == VK_FORMAT_UNDEFINED) + return false; + + enum pipe_format format = vk_format_to_pipe_format(vk_format); + const struct kk_va_format *supported_format = kk_get_va_format(format); + + /* Check if we have to apply any swizzle */ + if (!supported_format->is_native) { + unsigned channel_swizzle[] = { + supported_format->swizzle.red, supported_format->swizzle.green, + supported_format->swizzle.blue, supported_format->swizzle.alpha}; + + if (intrin->intrinsic == nir_intrinsic_store_output) { + b->cursor = nir_before_instr(&intrin->instr); + nir_def *to_replace = intrin->src[0].ssa; + nir_def *swizzled = nir_swizzle(b, to_replace, channel_swizzle, + to_replace->num_components); + nir_src_rewrite(&intrin->src[0], swizzled); + } else { + unsigned channel_unswizzle[4] = {0u}; + for (uint32_t i = 0u; i < 4; ++i) + channel_unswizzle[channel_swizzle[i]] = i; + + b->cursor = nir_after_instr(&intrin->instr); + nir_def *to_replace = &intrin->def; + nir_def *swizzled = nir_swizzle(b, to_replace, channel_unswizzle, + to_replace->num_components); + nir_def_rewrite_uses_after(to_replace, swizzled); + } + return true; + } + + return false; +} + +static void +kk_lower_vs_vbo(nir_shader *nir, const struct vk_graphics_pipeline_state *state) +{ + assert(!(nir->info.inputs_read & BITFIELD64_MASK(VERT_ATTRIB_GENERIC0)) && + "Fixed-function attributes not used in Vulkan"); + NIR_PASS(_, nir, nir_recompute_io_bases, nir_var_shader_in); + /* the shader_out portion of this is load-bearing even for tess eval */ + NIR_PASS(_, nir, nir_io_add_const_offset_to_base, + nir_var_shader_in | nir_var_shader_out); + + struct kk_attribute attributes[KK_MAX_ATTRIBS] = {}; + uint64_t attribs_read = nir->info.inputs_read >> VERT_ATTRIB_GENERIC0; + u_foreach_bit(i, state->vi->attributes_valid) { + const struct vk_vertex_attribute_state *attr = &state->vi->attributes[i]; + assert(state->vi->bindings_valid & BITFIELD_BIT(attr->binding)); + const struct vk_vertex_binding_state *binding = + &state->vi->bindings[attr->binding]; + + /* nir_assign_io_var_locations compacts vertex inputs, eliminating + * unused inputs. We need to do the same here to match the locations. + */ + unsigned slot = util_bitcount64(attribs_read & BITFIELD_MASK(i)); + attributes[slot].divisor = binding->divisor; + attributes[slot].binding = attr->binding; + attributes[slot].format = vk_format_to_pipe_format(attr->format); + attributes[slot].buf = attr->binding; + attributes[slot].instanced = + binding->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE; + } + NIR_PASS(_, nir, kk_nir_lower_vbo, attributes); +} + +static void +kk_lower_vs(nir_shader *nir, const struct vk_graphics_pipeline_state *state) +{ + if (state->ia->primitive_topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST) + nir_shader_intrinsics_pass(nir, msl_nir_vs_remove_point_size_write, + nir_metadata_control_flow, NULL); + + NIR_PASS(_, nir, msl_nir_layer_id_type); +} + +static void +kk_lower_fs_blend(nir_shader *nir, + const struct vk_graphics_pipeline_state *state) +{ + nir_lower_blend_options opts = { + .scalar_blend_const = false, + .logicop_enable = state->cb->logic_op_enable, + .logicop_func = state->cb->logic_op, + }; + + static_assert(ARRAY_SIZE(opts.format) == 8, "max RTs out of sync"); + + for (unsigned i = 0; i < ARRAY_SIZE(opts.format); ++i) { + opts.format[i] = + vk_format_to_pipe_format(state->rp->color_attachment_formats[i]); + if (state->cb->attachments[i].blend_enable) { + opts.rt[i] = (nir_lower_blend_rt){ + .rgb.src_factor = vk_blend_factor_to_pipe( + state->cb->attachments[i].src_color_blend_factor), + .rgb.dst_factor = vk_blend_factor_to_pipe( + state->cb->attachments[i].dst_color_blend_factor), + .rgb.func = + vk_blend_op_to_pipe(state->cb->attachments[i].color_blend_op), + + .alpha.src_factor = vk_blend_factor_to_pipe( + state->cb->attachments[i].src_alpha_blend_factor), + .alpha.dst_factor = vk_blend_factor_to_pipe( + state->cb->attachments[i].dst_alpha_blend_factor), + .alpha.func = + vk_blend_op_to_pipe(state->cb->attachments[i].alpha_blend_op), + + .colormask = state->cb->attachments[i].write_mask, + }; + } else { + opts.rt[i] = (nir_lower_blend_rt){ + .rgb.src_factor = PIPE_BLENDFACTOR_ONE, + .rgb.dst_factor = PIPE_BLENDFACTOR_ZERO, + .rgb.func = PIPE_BLEND_ADD, + + .alpha.src_factor = PIPE_BLENDFACTOR_ONE, + .alpha.dst_factor = PIPE_BLENDFACTOR_ZERO, + .alpha.func = PIPE_BLEND_ADD, + + .colormask = state->cb->attachments[i].write_mask, + }; + } + } + NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_out); + NIR_PASS(_, nir, nir_lower_blend, &opts); +} + +static bool +lower_subpass_dim(nir_builder *b, nir_tex_instr *tex, UNUSED void *_data) +{ + if (tex->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS) + tex->sampler_dim = GLSL_SAMPLER_DIM_2D; + else if (tex->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) + tex->sampler_dim = GLSL_SAMPLER_DIM_MS; + else + return false; + + return true; +} + +static void +kk_lower_fs(nir_shader *nir, const struct vk_graphics_pipeline_state *state) +{ + if (state->cb) + kk_lower_fs_blend(nir, state); + + if (state->rp->depth_attachment_format == VK_FORMAT_UNDEFINED || + nir->info.fs.early_fragment_tests) + NIR_PASS(_, nir, nir_shader_intrinsics_pass, + msl_nir_fs_remove_depth_write, nir_metadata_control_flow, NULL); + + /* Input attachments are treated as 2D textures. Fixes sampler dimension */ + NIR_PASS(_, nir, nir_shader_tex_pass, lower_subpass_dim, nir_metadata_all, + NULL); + + /* Swizzle non-native formats' outputs */ + NIR_PASS(_, nir, nir_shader_intrinsics_pass, kk_nir_swizzle_fragment_output, + nir_metadata_control_flow, (void *)state); + + /* Metal's sample mask is uint. */ + NIR_PASS(_, nir, msl_nir_sample_mask_type); + + if (state->ms && state->ms->rasterization_samples && + state->ms->sample_mask != UINT16_MAX) + NIR_PASS(_, nir, msl_lower_static_sample_mask, state->ms->sample_mask); + /* Check https://github.com/KhronosGroup/Vulkan-Portability/issues/54 for + * explanation on why we need this. */ + else if (nir->info.fs.needs_full_quad_helper_invocations || + nir->info.fs.needs_coarse_quad_helper_invocations) + NIR_PASS(_, nir, msl_lower_static_sample_mask, 0xFFFFFFFF); +} + +static void +kk_lower_nir(struct kk_device *dev, nir_shader *nir, + const struct vk_pipeline_robustness_state *rs, + uint32_t set_layout_count, + struct vk_descriptor_set_layout *const *set_layouts, + const struct vk_graphics_pipeline_state *state) +{ + /* Massage IO related variables to please Metal */ + if (nir->info.stage == MESA_SHADER_VERTEX) { + NIR_PASS(_, nir, kk_nir_lower_vs_multiview, state->rp->view_mask); + + /* kk_nir_lower_vs_multiview may create a temporary array to assign the + * correct view index. Since we don't handle derefs, we need to get rid of + * them. */ + NIR_PASS(_, nir, nir_lower_vars_to_scratch, nir_var_function_temp, 0, + glsl_get_natural_size_align_bytes, + glsl_get_natural_size_align_bytes); + + NIR_PASS(_, nir, msl_ensure_vertex_position_output); + } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { + enum pipe_format rts[MAX_DRAW_BUFFERS] = {PIPE_FORMAT_NONE}; + const struct vk_render_pass_state *rp = state->rp; + for (uint32_t i = 0u; i < MAX_DRAW_BUFFERS; ++i) + rts[i] = vk_format_to_pipe_format(rp->color_attachment_formats[i]); + + NIR_PASS(_, nir, msl_nir_fs_force_output_signedness, rts); + + NIR_PASS(_, nir, kk_nir_lower_fs_multiview, state->rp->view_mask); + + if (state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED && + state->ial && state->ial->depth_att != MESA_VK_ATTACHMENT_NO_INDEX) { + NIR_PASS(_, nir, msl_ensure_depth_write); + } + } + + const struct lower_ycbcr_state ycbcr_state = { + .set_layout_count = set_layout_count, + .set_layouts = set_layouts, + }; + NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lookup_ycbcr_conversion, + &ycbcr_state); + + /* Common msl texture lowering needs to happen after ycbcr lowering and + * before descriptor lowering. */ + NIR_PASS(_, nir, msl_lower_textures); + + /* Lower push constants before lower_descriptors */ + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const, + nir_address_format_32bit_offset); + + NIR_PASS(_, nir, nir_lower_memory_model); + + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global, + nir_address_format_64bit_global); + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo, + nir_address_format_64bit_bounded_global); + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo, + nir_address_format_64bit_bounded_global); + + NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, + type_size_vec4, + nir_lower_io_lower_64bit_to_32 | + nir_lower_io_use_interpolated_input_intrinsics); + + if (!nir->info.shared_memory_explicit_layout) { + NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, + shared_var_info); + } + NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, + nir_address_format_32bit_offset); + + if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) { + /* QMD::SHARED_MEMORY_SIZE requires an alignment of 256B so it's safe to + * align everything up to 16B so we can write whole vec4s. + */ + nir->info.shared_size = align(nir->info.shared_size, 16); + NIR_PASS(_, nir, nir_zero_initialize_shared_memory, nir->info.shared_size, + 16); + + /* We need to call lower_compute_system_values again because + * nir_zero_initialize_shared_memory generates load_invocation_id which + * has to be lowered to load_invocation_index. + */ + NIR_PASS(_, nir, nir_lower_compute_system_values, NULL); + } + + NIR_PASS(_, nir, nir_opt_dce); + NIR_PASS(_, nir, nir_lower_variable_initializers, ~nir_var_function_temp); + NIR_PASS(_, nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out | nir_var_system_value, + NULL); + nir->info.io_lowered = true; + + /* Required before kk_nir_lower_vbo so load_input intrinsics' parents are + * load_const, otherwise the pass will complain */ + NIR_PASS(_, nir, nir_opt_constant_folding); + + /* These passes operate on lowered IO. */ + if (nir->info.stage == MESA_SHADER_VERTEX) { + kk_lower_vs(nir, state); + } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { + kk_lower_fs(nir, state); + } + + /* Descriptor lowering needs to happen after lowering blend since we will + * generate a nir_intrinsic_load_blend_const_color_rgba which gets lowered by + * the lower descriptor pass + */ + NIR_PASS(_, nir, kk_nir_lower_descriptors, rs, set_layout_count, + set_layouts); + NIR_PASS(_, nir, kk_nir_lower_textures); + + NIR_PASS(_, nir, nir_lower_global_vars_to_local); +} + +static const struct vk_shader_ops kk_shader_ops; + +static void +kk_shader_destroy(struct vk_device *vk_dev, struct vk_shader *vk_shader, + const VkAllocationCallbacks *pAllocator) +{ + struct kk_device *dev = container_of(vk_dev, struct kk_device, vk); + struct kk_shader *shader = container_of(vk_shader, struct kk_shader, vk); + + if (shader->pipeline.cs) { + mtl_release(shader->pipeline.cs); + } else if (shader->pipeline.gfx.handle) { + mtl_release(shader->pipeline.gfx.handle); + if (shader->pipeline.gfx.mtl_depth_stencil_state_handle) + mtl_release(shader->pipeline.gfx.mtl_depth_stencil_state_handle); + shader->pipeline.gfx.handle = NULL; + shader->pipeline.gfx.mtl_depth_stencil_state_handle = NULL; + } + + ralloc_free((void *)shader->msl_code); + ralloc_free((void *)shader->entrypoint_name); + + vk_shader_free(&dev->vk, pAllocator, &shader->vk); +} + +static bool +gather_vs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *data) +{ + if (intr->intrinsic != nir_intrinsic_load_input) + return false; + + struct nir_io_semantics io = nir_intrinsic_io_semantics(intr); + BITSET_WORD *attribs_read = data; + BITSET_SET(attribs_read, (io.location - VERT_ATTRIB_GENERIC0)); + return false; +} + +static void +gather_shader_info(struct kk_shader *shader, nir_shader *nir, + const struct vk_graphics_pipeline_state *state) +{ + shader->info.stage = nir->info.stage; + if (nir->info.stage == MESA_SHADER_VERTEX) { + nir_shader_intrinsics_pass(nir, gather_vs_inputs, nir_metadata_all, + &shader->info.vs.attribs_read); + } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { + /* Some meta shaders like vk-meta-resolve will have depth_layout as NONE + * which is not a valid Metal layout */ + if (nir->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE) + nir->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; + } else if (nir->info.stage == MESA_SHADER_COMPUTE) { + shader->info.cs.local_size.x = nir->info.workgroup_size[0]; + shader->info.cs.local_size.y = nir->info.workgroup_size[1]; + shader->info.cs.local_size.z = nir->info.workgroup_size[2]; + } +} + +static void +modify_nir_info(nir_shader *nir) +{ + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + if (nir->info.stage == MESA_SHADER_VERTEX) { + /* Vertex attribute fetch is done in shader through argument buffers. */ + nir->info.inputs_read = 0u; + } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { + /* Some meta shaders like vk-meta-resolve will have depth_layout as NONE + * which is not a valid Metal layout */ + if (nir->info.fs.depth_layout == FRAG_DEPTH_LAYOUT_NONE) + nir->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; + + /* These values are part of the declaration and go with IO. We only + * require the instrunctions to understand interpolation mode. */ + BITSET_CLEAR(nir->info.system_values_read, + SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL); + BITSET_CLEAR(nir->info.system_values_read, + SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE); + BITSET_CLEAR(nir->info.system_values_read, + SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID); + BITSET_CLEAR(nir->info.system_values_read, + SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL); + BITSET_CLEAR(nir->info.system_values_read, + SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID); + BITSET_CLEAR(nir->info.system_values_read, + SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE); + } +} + +static VkResult +kk_compile_shader(struct kk_device *dev, struct vk_shader_compile_info *info, + const struct vk_graphics_pipeline_state *state, + const VkAllocationCallbacks *pAllocator, + struct vk_shader **shader_out) +{ + struct kk_shader *shader; + VkResult result = VK_SUCCESS; + + /* We consume the NIR, regardless of success or failure */ + nir_shader *nir = info->nir; + + shader = vk_shader_zalloc(&dev->vk, &kk_shader_ops, info->stage, pAllocator, + sizeof(*shader)); + if (shader == NULL) { + ralloc_free(nir); + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + if (nir->info.io_lowered == false) + kk_lower_nir(dev, nir, info->robustness, info->set_layout_count, + info->set_layouts, state); + + gather_shader_info(shader, nir, state); + + /* VBO lowering needs to go here otherwise, the linking step removes all + * inputs since we read vertex attributes from UBOs. */ + if (info->stage == MESA_SHADER_VERTEX) { + kk_lower_vs_vbo(nir, state); + } + msl_optimize_nir(nir); + modify_nir_info(nir); + shader->msl_code = nir_to_msl(nir, NULL); + const char *entrypoint_name = nir_shader_get_entrypoint(nir)->function->name; + + /* We need to steal so it doesn't get destroyed with the nir. Needs to happen + * after nir_to_msl since that's where we rename the entrypoint. + */ + ralloc_steal(NULL, (void *)entrypoint_name); + shader->entrypoint_name = entrypoint_name; + + if (KK_DEBUG(MSL)) + mesa_logi("%s\n", shader->msl_code); + + ralloc_free(nir); + + *shader_out = &shader->vk; + + return result; +} + +static const struct vk_pipeline_robustness_state rs_none = { + .uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT, + .storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT, + .images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT, +}; + +VkResult +kk_compile_nir_shader(struct kk_device *dev, nir_shader *nir, + const VkAllocationCallbacks *alloc, + struct kk_shader **shader_out) +{ + const struct kk_physical_device *pdev = kk_device_physical(dev); + + assert(nir->info.stage == MESA_SHADER_COMPUTE); + if (nir->options == NULL) + nir->options = kk_get_nir_options((struct vk_physical_device *)&pdev->vk, + nir->info.stage, &rs_none); + + struct vk_shader_compile_info info = { + .stage = nir->info.stage, + .nir = nir, + .robustness = &rs_none, + }; + + struct vk_shader *shader = NULL; + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + VkResult result = kk_compile_shader(dev, &info, NULL, alloc, &shader); + if (result != VK_SUCCESS) + return result; + + *shader_out = container_of(shader, struct kk_shader, vk); + + return VK_SUCCESS; +} + +static void +nir_opts(nir_shader *nir) +{ + bool progress; + + do { + progress = false; + + NIR_PASS(progress, nir, nir_opt_loop); + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_remove_phis); + NIR_PASS(progress, nir, nir_opt_dce); + + NIR_PASS(progress, nir, nir_opt_if, 0); + NIR_PASS(progress, nir, nir_opt_dead_cf); + NIR_PASS(progress, nir, nir_opt_cse); + + NIR_PASS(progress, nir, nir_opt_peephole_select, + &(nir_opt_peephole_select_options){ + .limit = 8, + .expensive_alu_ok = true, + .discard_ok = true, + }); + + NIR_PASS(progress, nir, nir_opt_phi_precision); + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_io_add_const_offset_to_base, + nir_var_shader_in | nir_var_shader_out); + + NIR_PASS(progress, nir, nir_opt_undef); + NIR_PASS(progress, nir, nir_opt_loop_unroll); + } while (progress); +} + +static nir_shader * +get_empty_nir(struct kk_device *dev, mesa_shader_stage stage, + const struct vk_graphics_pipeline_state *state) +{ + nir_shader *nir = nir_shader_create( + NULL, stage, + kk_get_nir_options(&kk_device_physical(dev)->vk, stage, NULL)); + + nir_function *function = nir_function_create(nir, "main_entrypoint"); + function->is_entrypoint = true; + nir_function_impl_create(function); + + const struct vk_pipeline_robustness_state no_robustness = { + .storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED, + .uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED, + .vertex_inputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED, + .images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED, + .null_uniform_buffer_descriptor = false, + .null_storage_buffer_descriptor = false, + }; + kk_lower_nir(dev, nir, &no_robustness, 0u, NULL, state); + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + return nir; +} + +static VkResult +kk_compile_compute_pipeline(struct kk_device *device, struct kk_shader *shader) +{ + uint32_t local_size_threads = shader->info.cs.local_size.x * + shader->info.cs.local_size.y * + shader->info.cs.local_size.z; + mtl_library *library = mtl_new_library(device->mtl_handle, shader->msl_code); + if (library == NULL) + return VK_ERROR_INVALID_SHADER_NV; + + mtl_function *function = + mtl_new_function_with_name(library, shader->entrypoint_name); + shader->pipeline.cs = mtl_new_compute_pipeline_state( + device->mtl_handle, function, local_size_threads); + mtl_release(function); + mtl_release(library); + + if (shader->pipeline.cs == NULL) + return VK_ERROR_INVALID_SHADER_NV; + + return VK_SUCCESS; +} + +static bool +has_static_depth_stencil_state(const struct vk_graphics_pipeline_state *state) +{ + if (!state->ds) + return false; + + return !( + BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) | + BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) | + BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) | + BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) | + BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_OP) | + BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) | + BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)); +} + +mtl_depth_stencil_state * +kk_compile_depth_stencil_state(struct kk_device *device, + const struct vk_depth_stencil_state *ds, + bool has_depth, bool has_stencil) +{ + mtl_stencil_descriptor *front = NULL; + mtl_stencil_descriptor *back = NULL; + mtl_depth_stencil_descriptor *descriptor = + mtl_new_depth_stencil_descriptor(); + if (has_depth && ds->depth.test_enable) { + mtl_depth_stencil_descriptor_set_depth_write_enabled( + descriptor, ds->depth.write_enable); + mtl_depth_stencil_descriptor_set_depth_compare_function( + descriptor, ds->depth.compare_op); + } else { + /* Only way to disable is to always pass */ + mtl_depth_stencil_descriptor_set_depth_write_enabled(descriptor, false); + mtl_depth_stencil_descriptor_set_depth_compare_function( + descriptor, VK_COMPARE_OP_ALWAYS); + } + + if (has_stencil && ds->stencil.test_enable) { + back = mtl_new_stencil_descriptor(); + mtl_stencil_descriptor_set_depth_failure_operation( + back, ds->stencil.back.op.depth_fail); + mtl_stencil_descriptor_set_stencil_failure_operation( + back, ds->stencil.back.op.fail); + mtl_stencil_descriptor_set_depth_stencil_pass_operation( + back, ds->stencil.back.op.pass); + mtl_stencil_descriptor_set_stencil_compare_function( + back, ds->stencil.back.op.compare); + mtl_stencil_descriptor_set_read_mask(back, ds->stencil.back.compare_mask); + mtl_stencil_descriptor_set_write_mask(back, ds->stencil.back.write_mask); + mtl_depth_stencil_descriptor_set_back_face_stencil(descriptor, back); + + front = mtl_new_stencil_descriptor(); + mtl_stencil_descriptor_set_depth_failure_operation( + front, ds->stencil.front.op.depth_fail); + mtl_stencil_descriptor_set_stencil_failure_operation( + front, ds->stencil.front.op.fail); + mtl_stencil_descriptor_set_depth_stencil_pass_operation( + front, ds->stencil.front.op.pass); + mtl_stencil_descriptor_set_stencil_compare_function( + front, ds->stencil.front.op.compare); + mtl_stencil_descriptor_set_read_mask(front, + ds->stencil.front.compare_mask); + mtl_stencil_descriptor_set_write_mask(front, + ds->stencil.front.write_mask); + mtl_depth_stencil_descriptor_set_front_face_stencil(descriptor, front); + } + + mtl_depth_stencil_state *state = + mtl_new_depth_stencil_state(device->mtl_handle, descriptor); + + if (front) + mtl_release(front); + if (back) + mtl_release(back); + mtl_release(descriptor); + + return state; +} + +/* TODO_KOSMICKRISP For now we just support vertex and fragment */ +static VkResult +kk_compile_graphics_pipeline(struct kk_device *device, + struct kk_shader *vertex_shader, + struct kk_shader *fragment_shader, + const struct vk_graphics_pipeline_state *state) +{ + VkResult result = VK_SUCCESS; + + assert(vertex_shader->info.stage == MESA_SHADER_VERTEX && + fragment_shader->info.stage == MESA_SHADER_FRAGMENT); + + mtl_library *vertex_library = + mtl_new_library(device->mtl_handle, vertex_shader->msl_code); + if (vertex_library == NULL) + return VK_ERROR_INVALID_SHADER_NV; + + mtl_function *vertex_function = mtl_new_function_with_name( + vertex_library, vertex_shader->entrypoint_name); + + mtl_library *fragment_library = + mtl_new_library(device->mtl_handle, fragment_shader->msl_code); + if (fragment_library == NULL) { + result = VK_ERROR_INVALID_SHADER_NV; + goto destroy_vertex; + } + mtl_function *fragment_function = mtl_new_function_with_name( + fragment_library, fragment_shader->entrypoint_name); + + mtl_render_pipeline_descriptor *pipeline_descriptor = + mtl_new_render_pipeline_descriptor(); + mtl_render_pipeline_descriptor_set_vertex_shader(pipeline_descriptor, + vertex_function); + if (fragment_function) + mtl_render_pipeline_descriptor_set_fragment_shader(pipeline_descriptor, + fragment_function); + /* Layered rendering in Metal requires setting primitive topology class */ + mtl_render_pipeline_descriptor_set_input_primitive_topology( + pipeline_descriptor, + vk_primitive_topology_to_mtl_primitive_topology_class( + state->ia->primitive_topology)); + + for (uint8_t i = 0; i < state->rp->color_attachment_count; ++i) { + if (state->rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) + mtl_render_pipeline_descriptor_set_color_attachment_format( + pipeline_descriptor, i, + vk_format_to_mtl_pixel_format( + state->rp->color_attachment_formats[i])); + } + + if (state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED) + mtl_render_pipeline_descriptor_set_depth_attachment_format( + pipeline_descriptor, + vk_format_to_mtl_pixel_format(state->rp->depth_attachment_format)); + + if (state->rp->stencil_attachment_format != VK_FORMAT_UNDEFINED) + mtl_render_pipeline_descriptor_set_stencil_attachment_format( + pipeline_descriptor, + vk_format_to_mtl_pixel_format(state->rp->stencil_attachment_format)); + + if (has_static_depth_stencil_state(state)) { + bool has_depth = + state->rp->depth_attachment_format != VK_FORMAT_UNDEFINED; + bool has_stencil = + state->rp->stencil_attachment_format != VK_FORMAT_UNDEFINED; + vertex_shader->pipeline.gfx.mtl_depth_stencil_state_handle = + kk_compile_depth_stencil_state(device, state->ds, has_depth, + has_stencil); + } + + if (state->rp->view_mask) { + uint32_t max_amplification = util_bitcount(state->rp->view_mask); + mtl_render_pipeline_descriptor_set_max_vertex_amplification_count( + pipeline_descriptor, max_amplification); + } + + if (state->ms) { + mtl_render_pipeline_descriptor_set_raster_sample_count( + pipeline_descriptor, state->ms->rasterization_samples); + mtl_render_pipeline_descriptor_set_alpha_to_coverage( + pipeline_descriptor, state->ms->alpha_to_coverage_enable); + mtl_render_pipeline_descriptor_set_alpha_to_one( + pipeline_descriptor, state->ms->alpha_to_one_enable); + } + + vertex_shader->pipeline.gfx.handle = + mtl_new_render_pipeline(device->mtl_handle, pipeline_descriptor); + if (vertex_shader->pipeline.gfx.handle == NULL) + result = VK_ERROR_INVALID_SHADER_NV; + vertex_shader->pipeline.gfx.primitive_type = + vk_primitive_topology_to_mtl_primitive_type( + state->ia->primitive_topology); + + mtl_release(pipeline_descriptor); + mtl_release(fragment_function); + mtl_release(fragment_library); +destroy_vertex: + mtl_release(vertex_function); + mtl_release(vertex_library); + + return result; +} + +static VkResult +kk_compile_shaders(struct vk_device *device, uint32_t shader_count, + struct vk_shader_compile_info *infos, + const struct vk_graphics_pipeline_state *state, + const struct vk_features *enabled_features, + const VkAllocationCallbacks *pAllocator, + struct vk_shader **shaders_out) +{ + VkResult result = VK_SUCCESS; + struct kk_device *dev = container_of(device, struct kk_device, vk); + + /* Vulkan doesn't enforce a fragment shader to build pipelines. We may need + * to create one. */ + nir_shader *null_fs = NULL; + nir_shader *shaders[shader_count + 1u]; + + /* Lower shaders, notably lowering IO. This is a prerequisite for intershader + * optimization. */ + for (uint32_t i = 0u; i < shader_count; ++i) { + const struct vk_shader_compile_info *info = &infos[i]; + nir_shader *nir = info->nir; + + kk_lower_nir(dev, nir, info->robustness, info->set_layout_count, + info->set_layouts, state); + + shaders[i] = nir; + } + + /* Since we don't support GPL nor shader objects and Metal render pipelines + * require both vertex and fragment, we may need to provide a pass-through + * fragment. */ + if (state && + shaders[shader_count - 1u]->info.stage != MESA_SHADER_FRAGMENT) { + null_fs = get_empty_nir(dev, MESA_SHADER_FRAGMENT, state); + shaders[shader_count] = null_fs; + } + + uint32_t total_shaders = null_fs ? shader_count + 1 : shader_count; + nir_opt_varyings_bulk(shaders, total_shaders, true, UINT32_MAX, UINT32_MAX, + nir_opts); + /* Second pass is required because some dEQP-VK.glsl.matrix.sub.dynamic.* + * would fail otherwise due to vertex outputting vec4 while fragments reading + * vec3 when in reality only vec3 is needed. */ + nir_opt_varyings_bulk(shaders, total_shaders, true, UINT32_MAX, UINT32_MAX, + nir_opts); + + for (uint32_t i = 0; i < shader_count; i++) { + result = + kk_compile_shader(dev, &infos[i], state, pAllocator, &shaders_out[i]); + if (result != VK_SUCCESS) { + /* Clean up all the shaders before this point */ + for (uint32_t j = 0; j < i; j++) + kk_shader_destroy(&dev->vk, shaders_out[j], pAllocator); + + /* Clean up all the NIR after this point */ + for (uint32_t j = i + 1; j < shader_count; j++) + ralloc_free(shaders[j]); + + if (null_fs) + ralloc_free(null_fs); + + /* Memset the output array */ + memset(shaders_out, 0, shader_count * sizeof(*shaders_out)); + + return result; + } + } + + /* Compile pipeline: + * 1. Compute pipeline + * 2. Graphics with all stages (since we don't support GPL nor shader + * objects for now). This will be addressed later. + */ + if (shaders_out[0]->stage == MESA_SHADER_COMPUTE) { + result = kk_compile_compute_pipeline( + dev, container_of(shaders_out[0], struct kk_shader, vk)); + } else { + struct kk_shader *vs = container_of(shaders_out[0], struct kk_shader, vk); + struct kk_shader *fs = + container_of(shaders_out[shader_count - 1u], struct kk_shader, vk); + if (null_fs) { + struct vk_shader_compile_info info = { + .stage = MESA_SHADER_FRAGMENT, + .nir = null_fs, + .robustness = &rs_none, + }; + struct vk_shader *frag_shader; + result = + kk_compile_shader(dev, &info, state, &dev->vk.alloc, &frag_shader); + + if (result != VK_SUCCESS) { + for (uint32_t i = 0; i < shader_count; i++) + kk_shader_destroy(&dev->vk, shaders_out[i], pAllocator); + + /* Memset the output array */ + memset(shaders_out, 0, shader_count * sizeof(*shaders_out)); + + return result; + } + fs = container_of(frag_shader, struct kk_shader, vk); + } + + result = kk_compile_graphics_pipeline(dev, vs, fs, state); + + if (null_fs) + kk_shader_destroy(&dev->vk, &fs->vk, pAllocator); + } + + return result; +} + +static bool +kk_shader_serialize(struct vk_device *vk_dev, const struct vk_shader *vk_shader, + struct blob *blob) +{ + struct kk_shader *shader = container_of(vk_shader, struct kk_shader, vk); + + blob_write_bytes(blob, &shader->info, sizeof(shader->info)); + uint32_t entrypoint_length = strlen(shader->entrypoint_name) + 1; + blob_write_bytes(blob, &entrypoint_length, sizeof(entrypoint_length)); + uint32_t code_length = strlen(shader->msl_code) + 1; + blob_write_bytes(blob, &code_length, sizeof(code_length)); + blob_write_bytes(blob, shader->entrypoint_name, entrypoint_length); + blob_write_bytes(blob, shader->msl_code, code_length); + blob_write_bytes(blob, &shader->pipeline, sizeof(shader->pipeline)); + + /* We are building a new shader into the cache so we need to retain resources + */ + if (shader->info.stage == MESA_SHADER_COMPUTE) + mtl_retain(shader->pipeline.cs); + else if (shader->info.stage == MESA_SHADER_VERTEX) { + mtl_retain(shader->pipeline.gfx.handle); + if (shader->pipeline.gfx.mtl_depth_stencil_state_handle) + mtl_retain(shader->pipeline.gfx.mtl_depth_stencil_state_handle); + } + + return !blob->out_of_memory; +} + +static VkResult +kk_deserialize_shader(struct vk_device *vk_dev, struct blob_reader *blob, + uint32_t binary_version, + const VkAllocationCallbacks *pAllocator, + struct vk_shader **shader_out) +{ + struct kk_device *dev = container_of(vk_dev, struct kk_device, vk); + struct kk_shader *shader; + + struct kk_shader_info info; + blob_copy_bytes(blob, &info, sizeof(info)); + + const uint32_t entrypoint_length = blob_read_uint32(blob); + const uint32_t code_length = blob_read_uint32(blob); + if (blob->overrun) + return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + + shader = vk_shader_zalloc(&dev->vk, &kk_shader_ops, info.stage, pAllocator, + sizeof(*shader)); + if (shader == NULL) + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + + shader->info = info; + + shader->entrypoint_name = ralloc_array(NULL, char, entrypoint_length); + if (shader->entrypoint_name == NULL) { + kk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + shader->msl_code = ralloc_array(NULL, char, code_length); + if (shader->msl_code == NULL) { + kk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + blob_copy_bytes(blob, (void *)shader->entrypoint_name, entrypoint_length); + blob_copy_bytes(blob, (void *)shader->msl_code, code_length); + blob_copy_bytes(blob, &shader->pipeline, sizeof(shader->pipeline)); + if (blob->overrun) { + kk_shader_destroy(&dev->vk, &shader->vk, pAllocator); + return vk_error(dev, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); + } + + /* We are building a new shader so we need to retain resources */ + if (info.stage == MESA_SHADER_COMPUTE) + mtl_retain(shader->pipeline.cs); + else if (info.stage == MESA_SHADER_VERTEX) { + mtl_retain(shader->pipeline.gfx.handle); + if (shader->pipeline.gfx.mtl_depth_stencil_state_handle) + mtl_retain(shader->pipeline.gfx.mtl_depth_stencil_state_handle); + } + + *shader_out = &shader->vk; + + return VK_SUCCESS; +} + +static void +kk_cmd_bind_compute_shader(struct kk_cmd_buffer *cmd, struct kk_shader *shader) +{ + cmd->state.cs.pipeline_state = shader->pipeline.cs; + cmd->state.cs.dirty |= KK_DIRTY_PIPELINE; + cmd->state.cs.local_size = shader->info.cs.local_size; +} + +static void +kk_cmd_bind_graphics_shader(struct kk_cmd_buffer *cmd, + const mesa_shader_stage stage, + struct kk_shader *shader) +{ + /* Relevant pipeline data is only stored in vertex shaders */ + if (stage != MESA_SHADER_VERTEX) + return; + + cmd->state.gfx.primitive_type = shader->pipeline.gfx.primitive_type; + cmd->state.gfx.pipeline_state = shader->pipeline.gfx.handle; + cmd->state.gfx.vb.attribs_read = shader->info.vs.attribs_read; + + bool requires_dynamic_depth_stencil = + shader->pipeline.gfx.mtl_depth_stencil_state_handle == NULL; + if (cmd->state.gfx.is_depth_stencil_dynamic) { + /* If we are switching from dynamic to static, we need to clean up + * temporary state. Otherwise, leave the existing dynamic state + * untouched. + */ + if (!requires_dynamic_depth_stencil) { + mtl_release(cmd->state.gfx.depth_stencil_state); + cmd->state.gfx.depth_stencil_state = + shader->pipeline.gfx.mtl_depth_stencil_state_handle; + } + } else + cmd->state.gfx.depth_stencil_state = + shader->pipeline.gfx.mtl_depth_stencil_state_handle; + cmd->state.gfx.is_depth_stencil_dynamic = requires_dynamic_depth_stencil; + cmd->state.gfx.dirty |= KK_DIRTY_PIPELINE; +} + +static void +kk_cmd_bind_shaders(struct vk_command_buffer *cmd_buffer, uint32_t stage_count, + const mesa_shader_stage *stages, + struct vk_shader **const shaders) +{ + struct kk_cmd_buffer *cmd = + container_of(cmd_buffer, struct kk_cmd_buffer, vk); + + for (uint32_t i = 0; i < stage_count; i++) { + struct kk_shader *shader = container_of(shaders[i], struct kk_shader, vk); + + if (stages[i] == MESA_SHADER_COMPUTE || stages[i] == MESA_SHADER_KERNEL) + kk_cmd_bind_compute_shader(cmd, shader); + else + kk_cmd_bind_graphics_shader(cmd, stages[i], shader); + } +} + +static VkResult +kk_shader_get_executable_properties( + UNUSED struct vk_device *device, const struct vk_shader *vk_shader, + uint32_t *executable_count, VkPipelineExecutablePropertiesKHR *properties) +{ + VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, properties, + executable_count); + + return vk_outarray_status(&out); +} + +static VkResult +kk_shader_get_executable_statistics( + UNUSED struct vk_device *device, const struct vk_shader *vk_shader, + uint32_t executable_index, uint32_t *statistic_count, + VkPipelineExecutableStatisticKHR *statistics) +{ + /* TODO_KOSMICKRISP */ + return VK_SUCCESS; +} + +static VkResult +kk_shader_get_executable_internal_representations( + UNUSED struct vk_device *device, const struct vk_shader *vk_shader, + uint32_t executable_index, uint32_t *internal_representation_count, + VkPipelineExecutableInternalRepresentationKHR *internal_representations) +{ + /* TODO_KOSMICKRISP */ + return VK_SUCCESS; +} + +static const struct vk_shader_ops kk_shader_ops = { + .destroy = kk_shader_destroy, + .serialize = kk_shader_serialize, + .get_executable_properties = kk_shader_get_executable_properties, + .get_executable_statistics = kk_shader_get_executable_statistics, + .get_executable_internal_representations = + kk_shader_get_executable_internal_representations, +}; + +const struct vk_device_shader_ops kk_device_shader_ops = { + .get_nir_options = kk_get_nir_options, + .get_spirv_options = kk_get_spirv_options, + .preprocess_nir = kk_preprocess_nir, + .hash_state = kk_hash_graphics_state, + .compile = + kk_compile_shaders, /* This will only generate the MSL string we need to + use for actual library generation */ + .deserialize = kk_deserialize_shader, + .cmd_set_dynamic_graphics_state = vk_cmd_set_dynamic_graphics_state, + .cmd_bind_shaders = kk_cmd_bind_shaders, +};
\ No newline at end of file diff --git a/src/kosmickrisp/vulkan/kk_shader.h b/src/kosmickrisp/vulkan/kk_shader.h new file mode 100644 index 00000000000..6015d81815b --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_shader.h @@ -0,0 +1,85 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_SHADER_H +#define KK_SHADER_H 1 + +#include "kk_device_memory.h" +#include "kk_private.h" + +#include "vk_pipeline_cache.h" + +#include "vk_shader.h" + +struct kk_shader_info { + mesa_shader_stage stage; + union { + struct { + uint32_t attribs_read; + } vs; + + struct { + struct mtl_size local_size; + } cs; + }; +}; + +struct kk_shader { + struct vk_shader vk; + const char *entrypoint_name; + const char *msl_code; + + struct kk_shader_info info; + + /* Pipeline resources. Only stored in compute or vertex shaders */ + struct { + union { + struct { + mtl_render_pipeline_state *handle; + mtl_depth_stencil_state *mtl_depth_stencil_state_handle; + enum mtl_primitive_type primitive_type; + } gfx; + mtl_compute_pipeline_state *cs; + }; + } pipeline; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(kk_shader, vk.base, VkShaderEXT, + VK_OBJECT_TYPE_SHADER_EXT); + +extern const struct vk_device_shader_ops kk_device_shader_ops; + +static inline nir_address_format +kk_buffer_addr_format(VkPipelineRobustnessBufferBehaviorEXT robustness) +{ + switch (robustness) { + case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT: + return nir_address_format_64bit_global_32bit_offset; + case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT: + case VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT: + return nir_address_format_64bit_bounded_global; + default: + UNREACHABLE("Invalid robust buffer access behavior"); + } +} + +bool +kk_nir_lower_descriptors(nir_shader *nir, + const struct vk_pipeline_robustness_state *rs, + uint32_t set_layout_count, + struct vk_descriptor_set_layout *const *set_layouts); + +bool kk_nir_lower_textures(nir_shader *nir); + +bool kk_nir_lower_vs_multiview(nir_shader *nir, uint32_t view_mask); +bool kk_nir_lower_fs_multiview(nir_shader *nir, uint32_t view_mask); + +VkResult kk_compile_nir_shader(struct kk_device *dev, nir_shader *nir, + const VkAllocationCallbacks *alloc, + struct kk_shader **shader_out); + +#endif /* KK_SHADER_H */ diff --git a/src/kosmickrisp/vulkan/kk_sync.c b/src/kosmickrisp/vulkan/kk_sync.c new file mode 100644 index 00000000000..f44200c379e --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_sync.c @@ -0,0 +1,106 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_sync.h" + +#include "kk_device.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +static VkResult +kk_timeline_init(struct vk_device *device, struct vk_sync *sync, + uint64_t initial_value) +{ + struct kk_sync_timeline *timeline = + container_of(sync, struct kk_sync_timeline, base); + + struct kk_device *dev = container_of(device, struct kk_device, vk); + timeline->mtl_handle = mtl_new_shared_event(dev->mtl_handle); + mtl_shared_event_set_signaled_value(timeline->mtl_handle, initial_value); + + return VK_SUCCESS; +} + +static void +kk_timeline_finish(struct vk_device *device, struct vk_sync *sync) +{ + struct kk_sync_timeline *timeline = + container_of(sync, struct kk_sync_timeline, base); + mtl_release(timeline->mtl_handle); +} + +static VkResult +kk_timeline_signal(struct vk_device *device, struct vk_sync *sync, + uint64_t value) +{ + struct kk_sync_timeline *timeline = + container_of(sync, struct kk_sync_timeline, base); + mtl_shared_event_set_signaled_value(timeline->mtl_handle, value); + return VK_SUCCESS; +} + +static VkResult +kk_timeline_get_value(struct vk_device *device, struct vk_sync *sync, + uint64_t *value) +{ + struct kk_sync_timeline *timeline = + container_of(sync, struct kk_sync_timeline, base); + *value = mtl_shared_event_get_signaled_value(timeline->mtl_handle); + return VK_SUCCESS; +} + +static VkResult +kk_timeline_wait(struct vk_device *device, struct vk_sync *sync, + uint64_t wait_value, enum vk_sync_wait_flags wait_flags, + uint64_t abs_timeout_ns) +{ + struct kk_sync_timeline *timeline = + container_of(sync, struct kk_sync_timeline, base); + + /* abs_timeout_ns is the point in time when we should stop waiting, not the + * absolute time in ns. Therefore, we need to compute the delta from now to + * when we should stop waiting and convert to ms for Metal to be happy + * (Similar to what dzn does). + */ + uint64_t timeout_ms = 0u; + if (abs_timeout_ns == OS_TIMEOUT_INFINITE) { + timeout_ms = OS_TIMEOUT_INFINITE; + } else { + uint64_t cur_time = os_time_get_nano(); + uint64_t rel_timeout_ns = + abs_timeout_ns > cur_time ? abs_timeout_ns - cur_time : 0; + + timeout_ms = + (rel_timeout_ns / 1000000) + (rel_timeout_ns % 1000000 ? 1 : 0); + } + int completed = mtl_shared_event_wait_until_signaled_value( + timeline->mtl_handle, wait_value, timeout_ms); + + return completed != 0 ? VK_SUCCESS : VK_TIMEOUT; +} + +const struct vk_sync_type kk_sync_type = { + .size = sizeof(struct kk_sync_timeline), + .features = VK_SYNC_FEATURE_TIMELINE | VK_SYNC_FEATURE_GPU_WAIT | + VK_SYNC_FEATURE_CPU_WAIT | VK_SYNC_FEATURE_CPU_SIGNAL | + VK_SYNC_FEATURE_WAIT_PENDING | + VK_SYNC_FEATURE_WAIT_BEFORE_SIGNAL, + .init = kk_timeline_init, + .finish = kk_timeline_finish, + .signal = kk_timeline_signal, + .get_value = kk_timeline_get_value, + .reset = NULL, + .move = NULL, + .wait = kk_timeline_wait, + .wait_many = NULL, + .import_opaque_fd = NULL, + .export_opaque_fd = NULL, + .import_sync_file = NULL, + .export_sync_file = NULL, + .import_win32_handle = NULL, + .export_win32_handle = NULL, + .set_win32_export_params = NULL, +}; diff --git a/src/kosmickrisp/vulkan/kk_sync.h b/src/kosmickrisp/vulkan/kk_sync.h new file mode 100644 index 00000000000..da4f149918d --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_sync.h @@ -0,0 +1,23 @@ +/* + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_SYNC_TYPES_H +#define KK_SYNC_TYPES_H 1 + +#include "kosmickrisp/bridge/mtl_types.h" + +#include "vk_sync.h" + +struct kk_queue; + +struct kk_sync_timeline { + struct vk_sync base; + mtl_shared_event *mtl_handle; +}; + +extern const struct vk_sync_type kk_sync_type; + +#endif diff --git a/src/kosmickrisp/vulkan/kk_wsi.c b/src/kosmickrisp/vulkan/kk_wsi.c new file mode 100644 index 00000000000..5912e151ef7 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_wsi.c @@ -0,0 +1,114 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "kk_wsi.h" +#include "kk_cmd_buffer.h" +#include "kk_device.h" +#include "kk_dispatch_trampolines.h" +#include "kk_image.h" +#include "kk_instance.h" +#include "wsi_common.h" + +#include "kosmickrisp/bridge/mtl_bridge.h" + +static PFN_vkVoidFunction +kk_instance_get_proc_addr_unchecked(const struct vk_instance *instance, + const char *name) +{ + PFN_vkVoidFunction func; + + if (instance == NULL || name == NULL) + return NULL; + + func = vk_instance_dispatch_table_get(&instance->dispatch_table, name); + if (func != NULL) + return func; + + func = vk_physical_device_dispatch_table_get(&kk_physical_device_trampolines, + name); + if (func != NULL) + return func; + + func = vk_device_dispatch_table_get(&kk_device_trampolines, name); + if (func != NULL) + return func; + + return NULL; +} + +static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL +kk_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName) +{ + VK_FROM_HANDLE(kk_physical_device, pdev, physicalDevice); + return kk_instance_get_proc_addr_unchecked(pdev->vk.instance, pName); +} + +static VkResult +kk_bind_drawable_to_vkimage(VkImage vk_image, void *drawable) +{ + VK_FROM_HANDLE(kk_image, image, vk_image); + mtl_texture *texture = mtl_drawable_get_texture(drawable); + + /* This should only be called for swapchain binding. */ + assert(image->plane_count == 1); + struct kk_image_plane *plane = &image->planes[0]; + if (plane->mtl_handle) + mtl_release(plane->mtl_handle); + if (plane->mtl_handle_array) + mtl_release(plane->mtl_handle_array); + plane->mtl_handle = mtl_retain(texture); + plane->mtl_handle_array = NULL; + plane->addr = mtl_texture_get_gpu_resource_id(texture); + + return VK_SUCCESS; +} + +static void +kk_encode_drawable_present(VkCommandBuffer vk_cmd, void *drawable) +{ + VK_FROM_HANDLE(kk_cmd_buffer, cmd, vk_cmd); + mtl_retain(drawable); + cmd->drawable = drawable; +} + +static struct vk_queue * +kk_get_blit_queue(VkDevice device) +{ + /* We only have one queue, so just return that one. */ + VK_FROM_HANDLE(kk_device, dev, device); + return &dev->queue.vk; +} + +VkResult +kk_init_wsi(struct kk_physical_device *pdev) +{ + struct wsi_device_options wsi_options = {.sw_device = false}; + struct wsi_device *wsi = &pdev->wsi_device; + VkResult result = + wsi_device_init(wsi, kk_physical_device_to_handle(pdev), kk_wsi_proc_addr, + &pdev->vk.instance->alloc, + 0u, // Not relevant for metal wsi + NULL, // Not relevant for metal + &wsi_options); + if (result != VK_SUCCESS) + return result; + + wsi->metal.bind_drawable_to_vkimage = kk_bind_drawable_to_vkimage; + wsi->metal.encode_drawable_present = kk_encode_drawable_present; + wsi->get_blit_queue = kk_get_blit_queue; + + pdev->vk.wsi_device = wsi; + + return result; +} + +void +kk_finish_wsi(struct kk_physical_device *pdev) +{ + pdev->vk.wsi_device = NULL; + wsi_device_finish(&pdev->wsi_device, &pdev->vk.instance->alloc); +} diff --git a/src/kosmickrisp/vulkan/kk_wsi.h b/src/kosmickrisp/vulkan/kk_wsi.h new file mode 100644 index 00000000000..0a9fbd214b3 --- /dev/null +++ b/src/kosmickrisp/vulkan/kk_wsi.h @@ -0,0 +1,16 @@ +/* + * Copyright © 2022 Collabora Ltd. and Red Hat Inc. + * Copyright 2025 LunarG, Inc. + * Copyright 2025 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef KK_WSI_H +#define KK_WSI_H 1 + +#include "kk_physical_device.h" + +VkResult kk_init_wsi(struct kk_physical_device *pdev); +void kk_finish_wsi(struct kk_physical_device *pdev); + +#endif /* KK_WSI_H */ diff --git a/src/kosmickrisp/vulkan/meson.build b/src/kosmickrisp/vulkan/meson.build new file mode 100644 index 00000000000..41cfa8a6fec --- /dev/null +++ b/src/kosmickrisp/vulkan/meson.build @@ -0,0 +1,211 @@ +# Copyright 2025 LunarG, Inc. +# Copyright 2025 Google LLC +# SPDX-License-Identifier: MIT + +kk_device_dispatch_table_dependencies = declare_dependency( + sources : [ + kk_dispatch_trampolines[1], + ], +) + +# Unsure if needed, just making sure we don't royally mess up the dependencies +kk_device_dispatch_table_dependencies_list = [ + vulkan_lite_runtime_deps, + kk_device_dispatch_table_dependencies, + idep_nir, + idep_mesautil, + idep_vulkan_runtime, + idep_vulkan_util, + idep_vulkan_wsi, + idep_vulkan_wsi_headers, +] + +libkk_device_dispatch_table = static_library( + 'kk_device_dispatch_table', + kk_dispatch_trampolines, + include_directories : [inc_include, inc_src], + dependencies : kk_device_dispatch_table_dependencies_list, + c_args : c_msvc_compat_args, + gnu_symbol_visibility : 'hidden', + build_by_default : false, +) + +idep_kk_device_dispatch_table = declare_dependency( + # Instruct users of this library to link with --whole-archive. Otherwise, + # our weak function overloads may not resolve properly. + link_whole : [libkk_device_dispatch_table] +) + +kk_files = files( + 'kk_bo.h', + 'kk_bo.c', + 'kk_buffer_view.h', + 'kk_buffer_view.c', + 'kk_buffer.h', + 'kk_buffer.c', + 'kk_cmd_buffer.h', + 'kk_cmd_buffer.c', + 'kk_cmd_clear.c', + 'kk_cmd_copy.c', + 'kk_cmd_dispatch.c', + 'kk_cmd_draw.c', + 'kk_cmd_meta.c', + 'kk_cmd_pool.h', + 'kk_cmd_pool.c', + 'kk_debug.h', + 'kk_debug.c', + 'kk_descriptor_set_layout.h', + 'kk_descriptor_set_layout.c', + 'kk_descriptor_set.h', + 'kk_descriptor_set.c', + 'kk_device_lib.c', + 'kk_device_memory.h', + 'kk_device_memory.c', + 'kk_device.h', + 'kk_device.c', + 'kk_encoder.h', + 'kk_encoder.c', + 'kk_event.h', + 'kk_event.c', + 'kk_format.h', + 'kk_format.c', + 'kk_image_layout.h', + 'kk_image_layout.c', + 'kk_image_view.h', + 'kk_image_view.c', + 'kk_image.h', + 'kk_image.c', + 'kk_instance.h', + 'kk_instance.c', + 'kk_nir_lower_descriptors.c', + 'kk_nir_lower_multiview.c', + 'kk_nir_lower_textures.c', + 'kk_nir_lower_vbo.h', + 'kk_nir_lower_vbo.c', + 'kk_physical_device.h', + 'kk_physical_device.c', + 'kk_query_pool.h', + 'kk_query_pool.c', + 'kk_query_table.h', + 'kk_query_table.c', + 'kk_queue.h', + 'kk_queue.c', + 'kk_sampler.h', + 'kk_sampler.c', + 'kk_shader.h', + 'kk_shader.c', + 'kk_sync.h', + 'kk_sync.c', + 'kk_wsi.h', + 'kk_wsi.c', +) + +kkcl_files = files( + 'cl/kk_query.cl', + 'cl/kk_triangle_fan.cl', +) + +kk_entrypoints = custom_target( + 'kk_entrypoints', + input : [vk_entrypoints_gen, vk_api_xml], + output : ['kk_entrypoints.h', 'kk_entrypoints.c'], + command : [ + prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak', + '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'kk', + '--beta', with_vulkan_beta.to_string() + ], + depend_files : vk_entrypoints_gen_depend_files, +) + +relative_dir = fs.relative_to(meson.global_source_root(), meson.global_build_root()) + +kkcl_spv = custom_target( + input : kkcl_files, + output : 'kkcl.spv', + command : [ + prog_mesa_clc, '-o', '@OUTPUT@', '--depfile', '@DEPFILE@', kkcl_files, '--', + '-I' + join_paths(meson.project_source_root(), 'src/compiler/libcl'), + '-I' + join_paths(meson.current_source_dir(), '.'), + '-I' + join_paths(meson.project_source_root(), 'src'), + cl_args, + ], + depfile : 'libkk_shaders.h.d', +) + +kkcl = custom_target( + input : kkcl_spv, + output : ['kkcl.cpp', 'kkcl.h'], + command : [prog_vtn_bindgen2, '@INPUT@', '@OUTPUT@'], +) + +kk_deps = [ + idep_nir, + idep_mesautil, + idep_vulkan_runtime, + idep_vulkan_util, + idep_vulkan_wsi, + idep_vulkan_wsi_headers, + kk_device_dispatch_table_dependencies, + idep_kk_device_dispatch_table, + idep_msl_to_nir, + idep_mtl_bridge +] + +libkk = static_library( + 'kk', + [ + kk_files, + kk_entrypoints, + kkcl, + sha1_h, + ], + include_directories : [ + inc_include, + inc_src, + ], + dependencies : [kk_deps], + gnu_symbol_visibility : 'hidden', +) + +# Happens with macOS ld linker: +# "-Wl,-undefined,dynamic_lookup" required, otherwise, linker will complain about undefined symbols +libvulkan_kosmickrisp = shared_library( + 'vulkan_kosmickrisp', + link_whole: [libkk], + link_args: [ld_args_build_id, '-Wl,-undefined,dynamic_lookup'], + gnu_symbol_visibility : 'hidden', + install : true, +) + +kosmickrisp_mesa_icd = custom_target( + 'kosmickrisp_mesa_icd', + input : [vk_icd_gen, vk_api_xml], + output : 'kosmickrisp_mesa_icd.@0@.json'.format(host_machine.cpu()), + command : [ + prog_python, '@INPUT0@', + '--api-version', '1.3', '--xml', '@INPUT1@', + '--lib-path', get_option('prefix') / get_option('libdir') / 'libvulkan_kosmickrisp.dylib', + '--out', '@OUTPUT@', + ], + build_by_default : true, + install_dir : with_vulkan_icd_dir, + install_tag : 'runtime', + install : true, +) + +kosmickrisp_icd = custom_target( + 'kosmickrisp_mesa_devenv_icd', + input : [vk_icd_gen, vk_api_xml], + output : 'kosmickrisp_mesa_devenv_icd.@0@.json'.format(host_machine.cpu()), + command : [ + prog_python, '@INPUT0@', + '--api-version', '1.3', '--xml', '@INPUT1@', + '--lib-path', meson.current_build_dir() / 'libvulkan_kosmickrisp.dylib', + '--out', '@OUTPUT@', + ], + build_by_default : true, +) + +devenv.append('VK_DRIVER_FILES', kosmickrisp_icd.full_path()) +# Deprecated: replaced by VK_DRIVER_FILES above +devenv.append('VK_ICD_FILENAMES', kosmickrisp_icd.full_path()) diff --git a/src/meson.build b/src/meson.build index d08e26466da..f27dae33631 100644 --- a/src/meson.build +++ b/src/meson.build @@ -114,6 +114,9 @@ endif if with_gallium_asahi or with_asahi_vk or with_tools.contains('asahi') subdir('asahi') endif +if with_kosmickrisp_vk + subdir('kosmickrisp') +endif if with_gallium subdir('mesa') endif |