1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
|
/*
* Copyright © 2022 Valve Corporation
* SPDX-License-Identifier: MIT
*/
/* When using dynamic rendering with the suspend/resume functionality, we
* sometimes need to merge together multiple suspended render passes
* dynamically at submit time. This involves combining all the saved-up IBs,
* emitting the rendering commands usually emitted by
* CmdEndRenderPass()/CmdEndRendering(), and inserting them in between the
* user command buffers. This gets tricky, because the same command buffer can
* be submitted multiple times, each time with a different other set of
* command buffers, and with VK_COMMAND_BUFFER_SIMULTANEOUS_USE_BIT, this can
* happen before the previous submission of the same command buffer has
* finished. At some point we have to free these commands and the BOs they are
* contained in, and we can't do that when resubmitting the last command
* buffer in the sequence because it may still be in use. This means we have
* to make the commands owned by the device and roll our own memory tracking.
*/
#include "tu_dynamic_rendering.h"
#include "tu_cmd_buffer.h"
#include "tu_cs.h"
struct dynamic_rendering_entry {
struct tu_cmd_buffer *cmd_buffer;
uint32_t fence; /* The fence value when cmd_buffer becomes available */
};
static VkResult
get_cmd_buffer(struct tu_device *dev, struct tu_cmd_buffer **cmd_buffer_out)
{
struct tu6_global *global = dev->global_bo_map;
/* Note: because QueueSubmit is serialized, we don't need any locks here.
*/
uint32_t fence = global->dynamic_rendering_fence;
/* Go through the entries and return the finished ones to the pool,
* shrinking the array of pending entries.
*/
struct dynamic_rendering_entry *new_entry =
(struct dynamic_rendering_entry *) util_dynarray_begin(
&dev->dynamic_rendering_pending);
uint32_t entries = 0;
util_dynarray_foreach(&dev->dynamic_rendering_pending,
struct dynamic_rendering_entry, entry) {
if (entry->fence <= fence) {
VkCommandBuffer vk_buf = tu_cmd_buffer_to_handle(entry->cmd_buffer);
vk_common_FreeCommandBuffers(tu_device_to_handle(dev),
dev->dynamic_rendering_pool, 1, &vk_buf);
} else {
*new_entry = *entry;
new_entry++;
entries++;
}
}
UNUSED void *dummy =
util_dynarray_resize(&dev->dynamic_rendering_pending,
struct dynamic_rendering_entry, entries);
VkCommandBuffer vk_buf;
const VkCommandBufferAllocateInfo info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.pNext = NULL,
.commandPool = dev->dynamic_rendering_pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
};
VkResult result =
vk_common_AllocateCommandBuffers(tu_device_to_handle(dev), &info, &vk_buf);
if (result != VK_SUCCESS)
return result;
VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, vk_buf);
struct dynamic_rendering_entry entry = {
.cmd_buffer = cmd_buffer,
.fence = ++dev->dynamic_rendering_fence,
};
util_dynarray_append(&dev->dynamic_rendering_pending, entry);
*cmd_buffer_out = cmd_buffer;
return VK_SUCCESS;
}
VkResult
tu_init_dynamic_rendering(struct tu_device *dev)
{
dev->dynamic_rendering_pending = UTIL_DYNARRAY_INIT;
dev->dynamic_rendering_fence = 0;
const VkCommandPoolCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.pNext = NULL,
.flags = 0,
.queueFamilyIndex = 0,
};
return vk_common_CreateCommandPool(tu_device_to_handle(dev), &create_info,
&dev->vk.alloc,
&dev->dynamic_rendering_pool);
}
void
tu_destroy_dynamic_rendering(struct tu_device *dev)
{
vk_common_DestroyCommandPool(tu_device_to_handle(dev),
dev->dynamic_rendering_pool,
&dev->vk.alloc);
util_dynarray_fini(&dev->dynamic_rendering_pending);
}
VkResult
tu_insert_dynamic_cmdbufs(struct tu_device *dev,
struct tu_cmd_buffer ***cmds_ptr,
uint32_t *size)
{
struct tu_cmd_buffer **old_cmds = *cmds_ptr;
bool has_dynamic = false;
for (unsigned i = 0; i < *size; i++) {
if (old_cmds[i]->state.suspend_resume != SR_NONE) {
has_dynamic = true;
break;
}
}
if (!has_dynamic)
return VK_SUCCESS;
struct util_dynarray cmds = {0};
struct tu_cmd_buffer *cmd_buffer = NULL;
for (unsigned i = 0; i < *size; i++) {
switch (old_cmds[i]->state.suspend_resume) {
case SR_NONE:
case SR_IN_CHAIN:
case SR_IN_PRE_CHAIN:
break;
case SR_AFTER_PRE_CHAIN:
case SR_IN_CHAIN_AFTER_PRE_CHAIN:
cmd_buffer->trace_renderpass_start = u_trace_end_iterator(&cmd_buffer->rp_trace);
tu_append_pre_chain(cmd_buffer, old_cmds[i]);
const struct VkOffset2D *fdm_offsets =
cmd_buffer->pre_chain.fdm_offset ?
cmd_buffer->pre_chain.fdm_offsets : NULL;
TU_CALLX(dev, tu_cmd_render)(cmd_buffer, fdm_offsets);
tu_cs_emit_pkt7(&cmd_buffer->cs, CP_MEM_WRITE, 3);
tu_cs_emit_qw(&cmd_buffer->cs,
global_iova(cmd_buffer, dynamic_rendering_fence));
tu_cs_emit(&cmd_buffer->cs, dev->dynamic_rendering_fence);
TU_CALLX(dev, tu_EndCommandBuffer)(tu_cmd_buffer_to_handle(cmd_buffer));
util_dynarray_append(&cmds, cmd_buffer);
cmd_buffer = NULL;
break;
}
util_dynarray_append(&cmds, old_cmds[i]);
switch (old_cmds[i]->state.suspend_resume) {
case SR_NONE:
case SR_AFTER_PRE_CHAIN:
break;
case SR_IN_CHAIN:
case SR_IN_CHAIN_AFTER_PRE_CHAIN: {
assert(!cmd_buffer);
VkResult result = get_cmd_buffer(dev, &cmd_buffer);
if (result != VK_SUCCESS)
return result;
const VkCommandBufferBeginInfo begin = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
tu_cmd_buffer_begin(cmd_buffer, &begin);
/* Setup the render pass using the first command buffer involved in
* the chain, so that it will look like we're inside a render pass
* for tu_cmd_render().
*/
tu_restore_suspended_pass(cmd_buffer, old_cmds[i]);
FALLTHROUGH;
}
case SR_IN_PRE_CHAIN:
assert(cmd_buffer);
tu_append_pre_post_chain(cmd_buffer, old_cmds[i]);
/* When the command buffer is finally recorded, we need its state
* to be the state of the command buffer before it. We need this
* because we skip tu6_emit_hw().
*/
cmd_buffer->state.ccu_state = old_cmds[i]->state.ccu_state;
break;
}
}
struct tu_cmd_buffer **new_cmds = (struct tu_cmd_buffer **)
vk_alloc(&dev->vk.alloc, cmds.size, alignof(struct tu_cmd_buffer *),
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!new_cmds)
return VK_ERROR_OUT_OF_HOST_MEMORY;
memcpy(new_cmds, cmds.data, cmds.size);
*cmds_ptr = new_cmds;
*size = util_dynarray_num_elements(&cmds, struct tu_cmd_buffer *);
util_dynarray_fini(&cmds);
return VK_SUCCESS;
}
|